[Piglit] [Patch v7 12/13] framework: Update results to use version numbers

Mon Jun 23 16:38:32 PDT 2014

This patch updates our json to version 1. Changes from version 0 to
version 1 are as follows:

- renamed 'main' to 'results.json'
- dmesg must be a string (It was stored a list in some version 0
  results)
- subtests are never stored as duplicate entries, a single instance of
  the test is recorded, (In version 0 both are possible)
- there is no info entry in version 1, err, out, and returncode are
  always split into separate entries

This patch adds support to the results module for handling updates to
the results in a sane way. It does this by adding a result_version
attribute to the TestrunResult (which is stored as json), and
implementing the ability to incrementally update results between
versions.

It does this automatically on load, non-destructively, moving the old
results to results.json.old, but does write the updated results to disk,
making the cost of this update a one time cost.

v2: - Handle non-writable directories and files, this also fixes using
      file-descriptors as inputs
v4: - Fix bug that caused update_results to always return early
    - Fix bug that caused tests with one subtest to have their name
      mangled
    - Set TestrunResult.results_version by default, set it to None
      during load if the results file doesn't have a results_vesrion
      value. Otherwise results could end up with no version when they
      should have one
    - Fix a bug that raises an uncaught exception when a test's info has
      more elements than the expected 3 (mostly glean)
    - split the version 0 to 1 tests out of the results_tests module.
      There are enough tests and enough data specific to these tests it
      makes more sense to make a separate test module
v5: - Fix handling of subtest with / in their name
v6: - Properly handle results with proper one-entry-per-test
v7: - set TestrunResult to 0 instead of None when loading results that
      don't have a version
    - Use a function to loop over the update functions instead of
      writing each one out in a long list
    - Add two more framework tests that test rests.update_results()
    - Remove some unnecessarily careful variable checking, we have
      assurances earlier in the code path that the values we expect are
      there
    - update comments in _update_results() to include expected input and
      output
    - don't compare dicts with .iterkeys()
    - rework info splitting to be more robust

Signed-off-by: Dylan Baker <baker.dylan.c at gmail.com>
---
 framework/programs/run.py           |   4 +-
 framework/results.py                | 195 ++++++++++++++++++++++++++++++++--
 framework/tests/results_tests.py    |  62 ++++++++++-
 framework/tests/results_v0_tests.py | 206 ++++++++++++++++++++++++++++++++++++
 framework/tests/utils.py            |   2 +
 5 files changed, 453 insertions(+), 16 deletions(-)
 create mode 100644 framework/tests/results_v0_tests.py

diff --git a/framework/programs/run.py b/framework/programs/run.py
index 4a500bd..eea0a60 100644
--- a/framework/programs/run.py
+++ b/framework/programs/run.py
@@ -158,7 +158,7 @@ def run(input_):
         results.name = path.basename(args.results_path)
 
     # Begin json.
-    result_filepath = path.join(args.results_path, 'main')
+    result_filepath = path.join(args.results_path, 'results.json')
     result_file = open(result_filepath, 'w')
     json_writer = framework.results.JSONWriter(result_file)
 
@@ -212,7 +212,7 @@ def resume(input_):
     if results.options.get('platform'):
         opts.env['PIGLIT_PLATFORM'] = results.options['platform']
 
-    results_path = path.join(args.results_path, "main")
+    results_path = path.join(args.results_path, 'results.json')
     json_writer = framework.results.JSONWriter(open(results_path, 'w+'))
     json_writer.initialize_json(results.options, results.name,
                                 core.collect_system_info())
diff --git a/framework/results.py b/framework/results.py
index 5f0316a..ce1319d 100644
--- a/framework/results.py
+++ b/framework/results.py
@@ -23,6 +23,7 @@
 
 from __future__ import print_function
 import os
+import sys
 from cStringIO import StringIO
 try:
     import simplejson as json
@@ -39,6 +40,9 @@ __all__ = [
     'load_results',
 ]
 
+# The current version of the JSON results
+CURRENT_JSON_VERSION = 1
+
 
 def _piglit_encoder(obj):
     """ Encoder for piglit that can transform additional classes into json
@@ -129,7 +133,7 @@ class JSONWriter(object):
         self._open_containers = []
 
     def initialize_json(self, options, name, env):
-        """ Write boilerplate json code 
+        """ Write boilerplate json code
 
         This writes all of the json except the actual tests.
 
@@ -142,6 +146,7 @@ class JSONWriter(object):
 
         """
         self.open_dict()
+        self.write_dict_item('results_version', CURRENT_JSON_VERSION)
         self.write_dict_item('name', name)
 
         self.write_dict_key('options')
@@ -255,6 +260,7 @@ class TestrunResult(object):
                                 'wglinfo',
                                 'glxinfo',
                                 'lspci',
+                                'results_version',
                                 'time_elapsed']
         self.name = None
         self.uname = None
@@ -262,6 +268,7 @@ class TestrunResult(object):
         self.glxinfo = None
         self.lspci = None
         self.time_elapsed = None
+        self.results_version = CURRENT_JSON_VERSION
         self.tests = {}
 
         if resultfile:
@@ -272,6 +279,9 @@ class TestrunResult(object):
             except ValueError:
                 raw_dict = json.load(self.__repair_file(resultfile))
 
+            # If there is no results version in the json, put set it to zero
+            self.results_version = getattr(raw_dict, 'results_version', 0)
+
             # Check that only expected keys were unserialized.
             for key in raw_dict:
                 if key not in self.serialized_keys:
@@ -364,12 +374,179 @@ def load_results(filename):
     "main"
 
     """
+    # This will load any file or file-like thing. That would include pipes and
+    # file descriptors
+    if not os.path.isdir(filename):
+        filepath = filename
+    else:
+        # If there are both old and new results in a directory pick the new
+        # ones first
+        if os.path.exists(os.path.join(filename, 'results.json')):
+            filepath = os.path.join(filename, 'results.json')
+        # Version 0 results are called 'main'
+        elif os.path.exists(os.path.join(filename, 'main')):
+            filepath = os.path.join(filename, 'main')
+        else:
+            raise Exception("No results found")
+
+    with open(filepath, 'r') as f:
+        testrun = TestrunResult(f)
+
+    return update_results(testrun, filepath)
+
+
+def update_results(results, filepath):
+    """ Update results to the lastest version
+
+    This function is a wraper for other update_* functions, providing
+    incremental updates from one version to another.
+
+    Arguments:
+    results -- a TestrunResults instance
+    filepath -- the name of the file that the Testrunresults instance was
+                created from
+
+    """
+
+    def loop_updates(results):
+        """ Helper to select the proper update sequence """
+        # Python lacks a switch statement, the workaround is to use a
+        # dictionary
+        updates = {
+            0: _update_zero_to_one,
+        }
+
+        while results.results_version < CURRENT_JSON_VERSION:
+            results = updates[results.results_version](results)
+
+        return results
+
+    # If the results version is the current version there is no need to
+    # update, just return the results
+    if results.results_version == CURRENT_JSON_VERSION:
+        return results
+
+    results = loop_updates(results)
+
+    # Move the old results, and write the current results
+    filedir = os.path.dirname(filepath)
     try:
-        with open(filename, 'r') as resultsfile:
-            testrun = TestrunResult(resultsfile)
-    except IOError:
-        with open(os.path.join(filename, "main"), 'r') as resultsfile:
-            testrun = TestrunResult(resultsfile)
-
-    assert testrun.name is not None
-    return testrun
+        os.rename(filepath, os.path.join(filedir, 'results.json.old'))
+        results.write(os.path.join(filedir, 'results.json'))
+    except OSError:
+        print("WARNING: Could not write updated results {}".format(filepath),
+              file=sys.stderr)
+
+    return results
+
+
+def _update_zero_to_one(results):
+    """ Update version zero results to version 1 results
+
+    Changes from version 0 to version 1
+
+    - dmesg is sometimes stored as a list, sometimes stored as a string. In
+      version 1 it is always stored as a string
+    - in version 0 subtests are somtimes stored as duplicates, sometimes stored
+      only with a single entry, in version 1 tests with subtests are only
+      recorded once, always.
+    - Version 0 can have an info entry, or returncode, out, and err entries,
+      Version 1 will only have the latter
+    - version 0 results are called 'main', while version 1 results are called
+      'results.json' (This is not handled internally, it's either handled by
+      update_results() which will write the file back to disk, or needs to be
+      handled manually by the user)
+
+    """
+    updated_results = {}
+    remove = set()
+
+    for name, test in results.tests.iteritems():
+        # fix dmesg errors if any
+        if isinstance(test.get('dmesg'), list):
+            test['dmesg'] = '\n'.join(test['dmesg'])
+
+        # If a test as an info attribute, we want to remove it, if it doesn't
+        # have a returncode, out, or attribute we'll want to get those out of
+        # info first
+        #
+        # This expects that the order of info is rougly returncode, errors,
+        # output, *extra it can handle having extra information in the middle,
+        if (None in [test.get('out'), test.get('err'), test.get('returncode')]
+                and test.get('info')):
+
+            # This attempts to split everything before Errors: as a returncode,
+            # and everything before Output: as Errors, and everything else as
+            # output. This may result in extra info being put in out, this is
+            # actually fine since out is only parsed by humans.
+            returncode, split = test['info'].split('\n\nErrors:')
+            err, out = split.split('\n\nOutput:')
+
+            # returncode can be 0, and 0 is falsy, so ensure it is actually
+            # None
+            if test.get('returncode') is None:
+                # In some cases the returncode might not be set (like the test
+                # skipped), in that case it will be None, so set it
+                # apropriately
+                try:
+                    test['returncode'] = int(
+                        returncode[len('returncode: '):].strip())
+                except ValueError:
+                    test['returncode'] = None
+            if not test.get('err'):
+                test['err'] = err.strip()
+            if not test.get('out'):
+                test['out'] = out.strip()
+
+        # Remove the unused info key
+        if test.get('info'):
+            del test['info']
+
+        # If there is more than one subtest written in version 0 results that
+        # entry will be a complete copy of the original entry with '/{name}'
+        # appended. This loop looks for tests with subtests, removes the
+        # duplicate entries, and creates a new entry in update_results for the
+        # single full tests.
+        #
+        # this must be the last thing done in this loop, or there will be pain
+        if test.get('subtest'):
+            for sub in test['subtest'].iterkeys():
+                # adding the leading / ensures that we get exactly what we
+                # expect, since endswith does a character by chacter match, if
+                # the subtest name is duplicated it wont match, and if there
+                # are more trailing characters it will not match
+                #
+                # We expect duplicate names like this:
+                #  "group1/groupA/test1/subtest 1": <thing>,
+                #  "group1/groupA/test1/subtest 2": <thing>,
+                #  "group1/groupA/test1/subtest 3": <thing>,
+                #  "group1/groupA/test1/subtest 4": <thing>,
+                #  "group1/groupA/test1/subtest 5": <thing>,
+                #  "group1/groupA/test1/subtest 6": <thing>,
+                #  "group1/groupA/test1/subtest 7": <thing>,
+                # but what we want is groupg1/groupA/test1 and none of the
+                # subtest as keys in the dictionary at all
+                if name.endswith('/{0}'.format(sub)):
+                    testname = name[:-(len(sub) + 1)]  # remove leading /
+                    assert testname[-1] != '/'
+
+                    remove.add(name)
+                    break
+            else:
+                # This handles two cases, first that the results have only
+                # single entries for each test, regardless of subtests (new
+                # style), or that the test onhly as a single subtest and thus
+                # was recorded correctly
+                testname = name
+
+            if testname not in updated_results:
+                updated_results[testname] = test
+
+    for name in remove:
+        del results.tests[name]
+    results.tests.update(updated_results)
+
+    # set the results version
+    results.results_version = 1
+
+    return results
diff --git a/framework/tests/results_tests.py b/framework/tests/results_tests.py
index b31c505..4afa810 100644
--- a/framework/tests/results_tests.py
+++ b/framework/tests/results_tests.py
@@ -67,21 +67,28 @@ def test_initialize_jsonwriter():
         assert isinstance(func, results.JSONWriter)
 
 
-def test_load_results_folder():
+def test_load_results_folder_as_main():
     """ Test that load_results takes a folder with a file named main in it """
     with utils.tempdir() as tdir:
         with open(os.path.join(tdir, 'main'), 'w') as tfile:
             tfile.write(json.dumps(utils.JSON_DATA))
 
-        results_ = results.load_results(tdir)
-        assert results_
+        results.load_results(tdir)
+
+
+def test_load_results_folder():
+    """ Test that load_results takes a folder with a file named results.json """
+    with utils.tempdir() as tdir:
+        with open(os.path.join(tdir, 'results.json'), 'w') as tfile:
+            tfile.write(json.dumps(utils.JSON_DATA))
+
+        results.load_results(tdir)
 
 
 def test_load_results_file():
     """ Test that load_results takes a file """
     with utils.resultfile() as tfile:
-        results_ = results.load_results(tfile.name)
-        assert results_
+        results.load_results(tfile.name)
 
 
 def test_testresult_to_status():
@@ -107,3 +114,48 @@ def test_testrunresult_write():
             new = results.load_results(os.path.join(tdir, 'results.json'))
 
     nt.assert_dict_equal(result.__dict__, new.__dict__)
+
+
+def test_update_results_current():
+    """ update_results() returns early when the results_version is current """
+    data = utils.JSON_DATA.copy()
+    data['results_version'] = results.CURRENT_JSON_VERSION
+
+    with utils.tempdir() as d:
+        with open(os.path.join(d, 'main'), 'w') as f:
+            json.dump(data, f)
+
+        with open(os.path.join(d, 'main'), 'r') as f:
+            base = results.TestrunResult(f)
+
+        res = results.update_results(base, f.name)
+
+    nt.assert_dict_equal(res.__dict__, base.__dict__)
+
+
+def test_update_results_old():
+    """ update_results() updates results
+
+    Because of the design of the our updates (namely that they silently
+    incrementally update from x to y) it's impossible to konw exactly what
+    we'll get at th end without having tests that have to be reworked each time
+    updates are run. Since there already is (at least for v0 -> v1) a fairly
+    comprehensive set of tests, this test only tests that update_results() has
+    been set equal to the CURRENT_JSON_VERSION, (which is one of the effects of
+    runing update_results() with the assumption that there is sufficient other
+    testing of the update process.
+
+    """
+    data = utils.JSON_DATA.copy()
+    data['results_version'] = 0
+
+    with utils.tempdir() as d:
+        with open(os.path.join(d, 'main'), 'w') as f:
+            json.dump(data, f)
+
+        with open(os.path.join(d, 'main'), 'r') as f:
+            base = results.TestrunResult(f)
+
+        res = results.update_results(base, f.name)
+
+    nt.assert_equal(res.results_version, results.CURRENT_JSON_VERSION)
diff --git a/framework/tests/results_v0_tests.py b/framework/tests/results_v0_tests.py
new file mode 100644
index 0000000..fd8be26
--- /dev/null
+++ b/framework/tests/results_v0_tests.py
@@ -0,0 +1,206 @@
+# Copyright (c) 2014 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+""" Module provides tests for converting version zero results to version 1 """
+
+import json
+import copy
+import nose.tools as nt
+import framework.results as results
+import framework.tests.utils as utils
+
+
+DATA = copy.deepcopy(utils.JSON_DATA)
+DATA['tests']['sometest']['dmesg'] = ['this', 'is', 'dmesg']
+DATA['tests']['sometest']['info'] = \
+    'Returncode: 1\n\nErrors:stderr\n\nOutput: stdout\n'
+DATA['tests'].update({
+    'group1/groupA/test/subtest 1': {
+        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
+        'subtest': {
+            'subtest 1': 'pass',
+            'subtest 2': 'pass'
+        },
+        'returncode': 0,
+        'command': 'this is a command',
+        'result': 'pass',
+        'time': 0.1
+    },
+    'group1/groupA/test/subtest 2': {
+        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
+        'subtest': {
+            'subtest 1': 'pass',
+            'subtest 2': 'pass'
+        },
+        'returncode': 0,
+        'command': 'this is a command',
+        'result': 'pass',
+        'time': 0.1
+    },
+    'single/test/thing': {
+        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
+        'subtest': {
+            'subtest 1': 'pass',
+        },
+        'returncode': 0,
+        'command': 'this is a command',
+        'result': 'pass',
+        'time': 0.1
+    },
+    'group2/groupA/test/subtest 1/depth': {
+        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
+        'subtest': {
+            'subtest 1/depth': 'pass',
+            'subtest 2/float': 'pass'
+        },
+        'returncode': 0,
+        'command': 'this is a command',
+        'result': 'pass',
+        'time': 0.1
+    },
+    'group2/groupA/test/subtest 2/float': {
+        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
+        'subtest': {
+            'subtest 1/depth': 'pass',
+            'subtest 2/float': 'pass'
+        },
+        'returncode': 0,
+        'command': 'this is a command',
+        'result': 'pass',
+        'time': 0.1
+    },
+    'group3/groupA/test': {
+        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
+        'subtest': {
+            'subtest 1': 'pass',
+            'subtest 2': 'pass',
+            'subtest 3': 'pass',
+        },
+        'returncode': 0,
+        'command': 'this is a command',
+        'result': 'pass',
+        'time': 0.1
+    },
+})
+
+with utils.with_tempfile(json.dumps(DATA)) as f:
+    RESULT = results._update_zero_to_one(results.load_results(f))
+
+
+def test_dmesg():
+    """ version 1: dmesg is converted from a list to a string """
+    assert RESULT.tests['sometest']['dmesg'] == 'this\nis\ndmesg'
+
+
+def test_subtests_remove_duplicates():
+    """ Version 1: Removes duplicate entries """
+    assert 'group1/groupA/test/subtest 1' not in RESULT.tests
+    assert 'group1/groupA/test/subtest 2' not in RESULT.tests
+
+
+def test_subtests_add_test():
+    """ Version 1: Add an entry for the actual test """
+    assert RESULT.tests.get('group1/groupA/test')
+
+
+def test_subtests_test_is_testresult():
+    """ Version 1: The result of the new test is a TestResult Instance """
+    assert isinstance(
+        RESULT.tests['group1/groupA/test'],
+        results.TestResult)
+
+
+def test_info_delete():
+    """ Version 1: Remove the info name from results """
+    for value in RESULT.tests.itervalues():
+        assert 'info' not in value
+
+
+def test_returncode_from_info():
+    """ Version 1: Use the returncode from info if there is no returncode """
+    assert RESULT.tests['sometest']['returncode'] == 1
+
+
+def test_returncode_no_override():
+    """ Version 1: Do not clobber returncode with info
+
+    The returncode from info should not overwrite an existing returcnode
+    attribute, this test only tests that the value in info isn't used when
+    there is a value in returncode already
+
+    """
+    assert RESULT.tests['group1/groupA/test']['returncode'] != 1
+
+
+def test_err_from_info():
+    """ Version 1: add an err attribute from info """
+    assert RESULT.tests['group1/groupA/test']['err'] == 'stderr'
+
+
+def test_out_from_info():
+    """ Version 1: add an out attribute from info """
+    assert RESULT.tests['group1/groupA/test']['out'] == 'stdout'
+
+
+def test_set_version():
+    """ Version 1: Set the version to 1 """
+    assert RESULT.results_version == 1
+
+
+def test_dont_break_single_subtest():
+    """ Version 1: Don't break single subtest entries
+
+    A test with a single subtest was written correctly before, dont break it by
+    removing the name of the test. ex:
+    test/foo/bar: {
+        ...
+        subtest: {
+            1x1: pass
+        }
+    }
+
+    should remain test/foo/bar since bar is the name of the test not a subtest
+
+    """
+    assert RESULT.tests['single/test/thing']
+
+
+def test_info_split():
+    """ Version 1: info can split into any number of elements """
+    data = copy.copy(DATA)
+    data['tests']['sometest']['info'] = \
+        'Returncode: 1\n\nErrors:stderr\n\nOutput: stdout\n\nmore\n\nstuff'
+
+    with utils.with_tempfile(json.dumps(data)) as f:
+        results._update_zero_to_one(results.load_results(f))
+
+
+def test_subtests_with_slash():
+    """ Version 1: Subtest names with /'s are handled correctly """
+
+    expected = 'group2/groupA/test/subtest 1'
+    nt.assert_not_in(
+        expected, RESULT.tests.iterkeys(),
+        msg='{0} found in result, when it should not be'.format(expected))
+
+
+def test_handle_fixed_subtests():
+    """ Version 1: Correctly handle new single entry subtests correctly """
+    assert 'group3/groupA/test' in RESULT.tests.iterkeys()
diff --git a/framework/tests/utils.py b/framework/tests/utils.py
index f337b1e..252ced6 100644
--- a/framework/tests/utils.py
+++ b/framework/tests/utils.py
@@ -33,6 +33,7 @@ try:
     import simplejson as json
 except ImportError:
     import json
+import framework.results
 
 
 __all__ = [
@@ -49,6 +50,7 @@ JSON_DATA = {
         "filter": [],
         "exclude_filter": []
     },
+    "results_version": framework.results.CURRENT_JSON_VERSION,
     "name": "fake-tests",
     "lspci": "fake",
     "glxinfo": "fake",
-- 
2.0.0