[Piglit] [Patch v6] framework: Update results to use version numbers

Sun Jun 22 09:33:46 PDT 2014

On Sat, Jun 21, 2014 at 8:48 AM, Dylan Baker <baker.dylan.c at gmail.com> wrote:
> This patch updates our json to version 1. Changes from version 0 to
> version 1 are as follows:
>
> - renamed 'main' to 'results.json'
> - dmesg must be a string (It was stored a list in some version 0
>   results)
> - subtests are never stored as duplicate entries, a single instance of
>   the test is recorded, (In version 0 both are possible)
> - there is no info entry in version 1, err, out, and returncode are
>   always split into separate entries
>
> This patch adds support to the results module for handling updates to
> the results in a sane way. It does this by adding a result_version
> attribute to the TestrunResult (which is stored as json), and
> implementing the ability to incrementally update results between
> versions.
>
> It does this automatically on load, non-destructively, moving the old
> results to results.json.old, but does write the updated results to disk,
> making the cost of this update a one time cost.
>
> v2: - Handle non-writable directories and files, this also fixes using
>       file-descriptors as inputs
> v4: - Fix bug that caused update_results to always return early
>     - Fix bug that caused tests with one subtest to have their name
>       mangled
>     - Set TestrunResult.results_version by default, set it to None
>       during load if the results file doesn't have a results_vesrion
>       value. Otherwise results could end up with no version when they
>       should have one
>     - Fix a bug that raises an uncaught exception when a test's info has
>       more elements than the expected 3 (mostly glean)
>     - split the version 0 to 1 tests out of the results_tests module.
>       There are enough tests and enough data specific to these tests it
>       makes more sense to make a separate test module
> v5: - Fix handling of subtest with / in their name
> v6: - Properly handle results with proper one-entry-per-test
>
> Signed-off-by: Dylan Baker <baker.dylan.c at gmail.com>

Looks great overall, thanks so much for working on this! A few
comments below, but nothing too serious.

> ---
>
> I've also force pushed to my branch on github
>
>  framework/programs/run.py           |   4 +-
>  framework/results.py                | 157 +++++++++++++++++++++++++--
>  framework/tests/results_tests.py    |  17 ++-
>  framework/tests/results_v0_tests.py | 206 ++++++++++++++++++++++++++++++++++++
>  framework/tests/utils.py            |   2 +
>  5 files changed, 370 insertions(+), 16 deletions(-)
>  create mode 100644 framework/tests/results_v0_tests.py
>
> diff --git a/framework/programs/run.py b/framework/programs/run.py
> index 9255f64..ff13974 100644
> --- a/framework/programs/run.py
> +++ b/framework/programs/run.py
> @@ -157,7 +157,7 @@ def run(input_):
>          results.name = path.basename(args.results_path)
>
>      # Begin json.
> -    result_filepath = path.join(args.results_path, 'main')
> +    result_filepath = path.join(args.results_path, 'results.json')
>      result_file = open(result_filepath, 'w')
>      json_writer = framework.results.JSONWriter(result_file)
>
> @@ -212,7 +212,7 @@ def resume(input_):
>      if results.options.get('platform'):
>          env.env['PIGLIT_PLATFORM'] = results.options['platform']
>
> -    results_path = path.join(args.results_path, "main")
> +    results_path = path.join(args.results_path, 'results.json')
>      json_writer = framework.results.JSONWriter(open(results_path, 'w+'))
>      json_writer.initialize_json(results.options, results.name,
>                                  core.collect_system_info())
> diff --git a/framework/results.py b/framework/results.py
> index 1edc423..9005a4e 100644
> --- a/framework/results.py
> +++ b/framework/results.py
> @@ -23,6 +23,7 @@
>
>  from __future__ import print_function
>  import os
> +import sys
>  from cStringIO import StringIO
>  try:
>      import simplejson as json
> @@ -39,6 +40,9 @@ __all__ = [
>      'load_results',
>  ]
>
> +# The current version of the JSON results
> +CURRENT_JSON_VERSION = 1
> +
>
>  def _piglit_encoder(obj):
>      """ Encoder for piglit that can transform additional classes into json
> @@ -119,7 +123,7 @@ class JSONWriter(object):
>          self.__is_collection_empty = []
>
>      def initialize_json(self, options, name, env):
> -        """ Write boilerplate json code
> +        """ Write boilerplate json code
>
>          This writes all of the json except the actuall tests.
>
> @@ -132,6 +136,7 @@ class JSONWriter(object):
>
>          """
>          self.open_dict()
> +        self.write_dict_item('results_version', CURRENT_JSON_VERSION)
>          self.write_dict_item('name', name)
>
>          self.write_dict_key('options')
> @@ -229,6 +234,7 @@ class TestrunResult(object):
>                                  'wglinfo',
>                                  'glxinfo',
>                                  'lspci',
> +                                'results_version',
>                                  'time_elapsed']
>          self.name = None
>          self.uname = None
> @@ -236,6 +242,7 @@ class TestrunResult(object):
>          self.glxinfo = None
>          self.lspci = None
>          self.time_elapsed = None
> +        self.results_version = CURRENT_JSON_VERSION
>          self.tests = {}
>
>          if resultfile:
> @@ -246,6 +253,12 @@ class TestrunResult(object):
>              except ValueError:
>                  raw_dict = json.load(self.__repair_file(resultfile))
>
> +            # If the results don't have a results version set it to None,
> +            # otherwise they will pass through the update_results function
> +            # without being updated
> +            if not getattr(raw_dict, 'results_version', False):
> +                self.results_version = None

Wouldn't it make more sense to set it 0? Perhaps

self.results_version = getattr(raw_dict, 'results_version', 0)

> +
>              # Check that only expected keys were unserialized.
>              for key in raw_dict:
>                  if key not in self.serialized_keys:
> @@ -338,12 +351,138 @@ def load_results(filename):
>      "main"
>
>      """
> +    # This will load any file or file-like thing. That would include pipes and
> +    # file descriptors
> +    if not os.path.isdir(filename):
> +        filepath = filename
> +    else:
> +        # If there are both old and new results in a directory pick the new
> +        # ones first
> +        if os.path.exists(os.path.join(filename, 'results.json')):
> +            filepath = os.path.join(filename, 'results.json')
> +        # Version 0 results are called 'main'
> +        elif os.path.exists(os.path.join(filename, 'main')):
> +            filepath = os.path.join(filename, 'main')
> +        else:
> +            raise Exception("No results found")
> +
> +    with open(filepath, 'r') as f:
> +        testrun = TestrunResult(f)
> +
> +    return update_results(testrun, filepath)
> +
> +
> +def update_results(results, filepath):
> +    """ Update results to the lastest version
> +
> +    This function is a wraper for other update_* functions, providing
> +    incremental updates from one version to another.
> +
> +    """
> +    # If the results version is the current version there is no need to
> +    # update, just return the results
> +    if getattr(results, 'results_version', 0) == CURRENT_JSON_VERSION:

Why are you so carefully retrieving the results version? You set it
explicitly in the TestrunResult constructor, no? (So you can just do
self.results_version)

> +        return results
> +
> +    # If there is no version then the results are version 0, and need to be
> +    # updated to version 1
> +    if getattr(results, 'results_version') is None:
> +        results = _update_zero_to_one(results)

While this is fine for now, perhaps future-proof this a bit by making
a list of such functions and auto-run-through them starting at the
results_version index. Or I could just be overengineering this... your
call.

> +
> +    # Move the old results, and write the current results
> +    filedir = os.path.dirname(filepath)
>      try:
> -        with open(filename, 'r') as resultsfile:
> -            testrun = TestrunResult(resultsfile)
> -    except IOError:
> -        with open(os.path.join(filename, "main"), 'r') as resultsfile:
> -            testrun = TestrunResult(resultsfile)
> -
> -    assert testrun.name is not None
> -    return testrun
> +        os.rename(filepath, os.path.join(filedir, 'results.json.old'))
> +        results.write(os.path.join(filedir, 'results.json'))
> +    except OSError:
> +        print("WARNING: Could not write updated results {}".format(filepath),
> +              file=sys.stderr)
> +
> +    return results
> +
> +
> +def _update_zero_to_one(results):
> +    """ Update version zero results to version 1 results
> +
> +    Changes from version 0 to version 1
> +
> +    - dmesg is sometimes stored as a list, sometimes stored as a string. In
> +      version 1 it is always stored as a string
> +    - in version 0 subtests are somtimes stored as duplicates, sometimes stored
> +      only with a single entry, in version 1 tests with subtests are only
> +      recorded once, always.
> +    - Version 0 can have an info entry, or returncode, out, and err entries,
> +      Version 1 will only have the latter
> +    - version 0 results are called 'main', while version 1 results are called
> +      'results.json' (This is not handled internally, it's either handled by
> +      update_results() which will write the file back to disk, or needs to be
> +      handled manually by the user)
> +
> +    """
> +    updated_results = {}
> +    remove = set()
> +
> +    for name, test in results.tests.iteritems():
> +        # fix dmesg errors if any
> +        if isinstance(test.get('dmesg'), list):
> +            test['dmesg'] = '\n'.join(test['dmesg'])
> +
> +        # If a test as an info attribute, we want to remove it, if it doesn't
> +        # have a returncode, out, or attribute we'll want to get those out of
> +        # info first
> +        if (None in [test.get('out'), test.get('err'), test.get('returncode')]
> +                and test.get('info')):
> +            # Some tests put even more stuff in 'info', which means it cannot
> +            # be exploded (well, not in python2 anyway).
> +            # the values of split will be: returncode, stderr, stdout, *extra
> +            split = test['info'].split('\n\n')

What if it doesn't split itself into 3 things? Should probably check
for that. There's also the possibility that the test might print
multiple new lines on its own... if there are markers to search for,
that'd be mildly better. Looks like You can search for "\n\nOutput:"
etc.

> +
> +            # returncode can be 0, and 0 is falsy, so ensure it is actually
> +            # None
> +            if test.get('returncode') is None:
> +                # In some cases the returncode might not be set (like the test
> +                # skipped), in that case it will be None, so set it
> +                # apropriately
> +                try:
> +                    test['returncode'] = int(split[0].split()[1].strip())
> +                except ValueError:
> +                    test['returncode'] = None
> +            if not test.get('err'):
> +                test['err'] = split[1].strip()[7:]

I'd feel a little better if that were len("Errors:"), similar below.

> +            if not test.get('out'):
> +                test['out'] = split[2].strip()[7:]
> +
> +        # Remove the unused info key
> +        if test.get('info'):
> +            del test['info']
> +
> +        # Walk through the list of tests, if any of them contain duplicate
> +        # subtests, add those subtests to the remove set, and then write a
> +        # single test result to the update_results dictionary, which will be
> +        # merged into results
> +        #
> +        # this must be the last thing done in this loop, or there will be pain
> +        if test.get('subtest'):
> +            for sub in test['subtest'].iterkeys():
> +                # the leading / eliminates the possibility of a false
> +                # positive

Unless someone's a real jerk and has a subtest name the same as the
test name :) I'm perfectly happy to discount that possibility though.

This is all fairly elegant, but it definitely took me a bit to see why
it would work. It's all deeply tied to what sorts of things it
expects. Wouldn't hurt to throw a couple of examples in of the sort of
data it's getting as input and what it's handing out as output. (I
don't mean the full json, just like the types of test names/subtests
coming and what you want coming out.)

> +                if name.endswith('/{0}'.format(sub)):
> +                    testname = name[:-(len(sub) + 1)]  # Include /
> +                    assert testname[-1] != '/'
> +
> +                    remove.add(name)
> +                    break
> +            else:
> +                testname = name
> +
> +            if testname not in updated_results.iterkeys():

I think you can just do

testname not in updated_results

That way it'll use its internal dict set which is a lot faster than
iterating over all the keys.

You can also do

updated_results.setdefault(testname, test)

But I don't think it's clearer or faster.

> +                updated_results[testname] = test
> +
> +    for name in remove:
> +        del results.tests[name]
> +    results.tests.update(updated_results)
> +
> +    # set the results version
> +    results.results_version = 1
> +
> +    return results
> diff --git a/framework/tests/results_tests.py b/framework/tests/results_tests.py
> index b31c505..73594ac 100644
> --- a/framework/tests/results_tests.py
> +++ b/framework/tests/results_tests.py
> @@ -67,21 +67,28 @@ def test_initialize_jsonwriter():
>          assert isinstance(func, results.JSONWriter)
>
>
> -def test_load_results_folder():
> +def test_load_results_folder_as_main():
>      """ Test that load_results takes a folder with a file named main in it """
>      with utils.tempdir() as tdir:
>          with open(os.path.join(tdir, 'main'), 'w') as tfile:
>              tfile.write(json.dumps(utils.JSON_DATA))
>
> -        results_ = results.load_results(tdir)
> -        assert results_
> +        results.load_results(tdir)
> +
> +
> +def test_load_results_folder():
> +    """ Test that load_results takes a folder with a file named results.json """
> +    with utils.tempdir() as tdir:
> +        with open(os.path.join(tdir, 'results.json'), 'w') as tfile:
> +            tfile.write(json.dumps(utils.JSON_DATA))
> +
> +        results.load_results(tdir)
>
>
>  def test_load_results_file():
>      """ Test that load_results takes a file """
>      with utils.resultfile() as tfile:
> -        results_ = results.load_results(tfile.name)
> -        assert results_
> +        results.load_results(tfile.name)
>
>
>  def test_testresult_to_status():
> diff --git a/framework/tests/results_v0_tests.py b/framework/tests/results_v0_tests.py
> new file mode 100644
> index 0000000..fd8be26
> --- /dev/null
> +++ b/framework/tests/results_v0_tests.py
> @@ -0,0 +1,206 @@
> +# Copyright (c) 2014 Intel Corporation
> +
> +# Permission is hereby granted, free of charge, to any person obtaining a copy
> +# of this software and associated documentation files (the "Software"), to deal
> +# in the Software without restriction, including without limitation the rights
> +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +# copies of the Software, and to permit persons to whom the Software is
> +# furnished to do so, subject to the following conditions:
> +
> +# The above copyright notice and this permission notice shall be included in
> +# all copies or substantial portions of the Software.
> +
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> +# SOFTWARE.
> +
> +""" Module provides tests for converting version zero results to version 1 """
> +
> +import json
> +import copy
> +import nose.tools as nt
> +import framework.results as results
> +import framework.tests.utils as utils
> +
> +
> +DATA = copy.deepcopy(utils.JSON_DATA)
> +DATA['tests']['sometest']['dmesg'] = ['this', 'is', 'dmesg']
> +DATA['tests']['sometest']['info'] = \
> +    'Returncode: 1\n\nErrors:stderr\n\nOutput: stdout\n'
> +DATA['tests'].update({
> +    'group1/groupA/test/subtest 1': {
> +        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
> +        'subtest': {
> +            'subtest 1': 'pass',
> +            'subtest 2': 'pass'
> +        },
> +        'returncode': 0,
> +        'command': 'this is a command',
> +        'result': 'pass',
> +        'time': 0.1
> +    },
> +    'group1/groupA/test/subtest 2': {
> +        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
> +        'subtest': {
> +            'subtest 1': 'pass',
> +            'subtest 2': 'pass'
> +        },
> +        'returncode': 0,
> +        'command': 'this is a command',
> +        'result': 'pass',
> +        'time': 0.1
> +    },
> +    'single/test/thing': {
> +        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
> +        'subtest': {
> +            'subtest 1': 'pass',
> +        },
> +        'returncode': 0,
> +        'command': 'this is a command',
> +        'result': 'pass',
> +        'time': 0.1
> +    },
> +    'group2/groupA/test/subtest 1/depth': {
> +        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
> +        'subtest': {
> +            'subtest 1/depth': 'pass',
> +            'subtest 2/float': 'pass'
> +        },
> +        'returncode': 0,
> +        'command': 'this is a command',
> +        'result': 'pass',
> +        'time': 0.1
> +    },
> +    'group2/groupA/test/subtest 2/float': {
> +        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
> +        'subtest': {
> +            'subtest 1/depth': 'pass',
> +            'subtest 2/float': 'pass'
> +        },
> +        'returncode': 0,
> +        'command': 'this is a command',
> +        'result': 'pass',
> +        'time': 0.1
> +    },
> +    'group3/groupA/test': {
> +        'info': 'Returncode: 1\n\nErrors:stderr\n\nOutput:stdout\n',
> +        'subtest': {
> +            'subtest 1': 'pass',
> +            'subtest 2': 'pass',
> +            'subtest 3': 'pass',
> +        },
> +        'returncode': 0,
> +        'command': 'this is a command',
> +        'result': 'pass',
> +        'time': 0.1
> +    },
> +})
> +
> +with utils.with_tempfile(json.dumps(DATA)) as f:
> +    RESULT = results._update_zero_to_one(results.load_results(f))
> +
> +
> +def test_dmesg():
> +    """ version 1: dmesg is converted from a list to a string """
> +    assert RESULT.tests['sometest']['dmesg'] == 'this\nis\ndmesg'
> +
> +
> +def test_subtests_remove_duplicates():
> +    """ Version 1: Removes duplicate entries """
> +    assert 'group1/groupA/test/subtest 1' not in RESULT.tests
> +    assert 'group1/groupA/test/subtest 2' not in RESULT.tests
> +
> +
> +def test_subtests_add_test():
> +    """ Version 1: Add an entry for the actual test """
> +    assert RESULT.tests.get('group1/groupA/test')
> +
> +
> +def test_subtests_test_is_testresult():
> +    """ Version 1: The result of the new test is a TestResult Instance """
> +    assert isinstance(
> +        RESULT.tests['group1/groupA/test'],
> +        results.TestResult)
> +
> +
> +def test_info_delete():
> +    """ Version 1: Remove the info name from results """
> +    for value in RESULT.tests.itervalues():
> +        assert 'info' not in value
> +
> +
> +def test_returncode_from_info():
> +    """ Version 1: Use the returncode from info if there is no returncode """
> +    assert RESULT.tests['sometest']['returncode'] == 1
> +
> +
> +def test_returncode_no_override():
> +    """ Version 1: Do not clobber returncode with info
> +
> +    The returncode from info should not overwrite an existing returcnode
> +    attribute, this test only tests that the value in info isn't used when
> +    there is a value in returncode already
> +
> +    """
> +    assert RESULT.tests['group1/groupA/test']['returncode'] != 1
> +
> +
> +def test_err_from_info():
> +    """ Version 1: add an err attribute from info """
> +    assert RESULT.tests['group1/groupA/test']['err'] == 'stderr'
> +
> +
> +def test_out_from_info():
> +    """ Version 1: add an out attribute from info """
> +    assert RESULT.tests['group1/groupA/test']['out'] == 'stdout'
> +
> +
> +def test_set_version():
> +    """ Version 1: Set the version to 1 """
> +    assert RESULT.results_version == 1
> +
> +
> +def test_dont_break_single_subtest():
> +    """ Version 1: Don't break single subtest entries
> +
> +    A test with a single subtest was written correctly before, dont break it by
> +    removing the name of the test. ex:
> +    test/foo/bar: {
> +        ...
> +        subtest: {
> +            1x1: pass
> +        }
> +    }
> +
> +    should remain test/foo/bar since bar is the name of the test not a subtest
> +
> +    """
> +    assert RESULT.tests['single/test/thing']
> +
> +
> +def test_info_split():
> +    """ Version 1: info can split into any number of elements """
> +    data = copy.copy(DATA)
> +    data['tests']['sometest']['info'] = \
> +        'Returncode: 1\n\nErrors:stderr\n\nOutput: stdout\n\nmore\n\nstuff'
> +
> +    with utils.with_tempfile(json.dumps(data)) as f:
> +        results._update_zero_to_one(results.load_results(f))
> +
> +
> +def test_subtests_with_slash():
> +    """ Version 1: Subtest names with /'s are handled correctly """
> +
> +    expected = 'group2/groupA/test/subtest 1'
> +    nt.assert_not_in(
> +        expected, RESULT.tests.iterkeys(),
> +        msg='{0} found in result, when it should not be'.format(expected))
> +
> +
> +def test_handle_fixed_subtests():
> +    """ Version 1: Correctly handle new single entry subtests correctly """
> +    assert 'group3/groupA/test' in RESULT.tests.iterkeys()
> diff --git a/framework/tests/utils.py b/framework/tests/utils.py
> index f337b1e..252ced6 100644
> --- a/framework/tests/utils.py
> +++ b/framework/tests/utils.py
> @@ -33,6 +33,7 @@ try:
>      import simplejson as json
>  except ImportError:
>      import json
> +import framework.results
>
>
>  __all__ = [
> @@ -49,6 +50,7 @@ JSON_DATA = {
>          "filter": [],
>          "exclude_filter": []
>      },
> +    "results_version": framework.results.CURRENT_JSON_VERSION,
>      "name": "fake-tests",
>      "lspci": "fake",
>      "glxinfo": "fake",
> --
> 2.0.0
>
> _______________________________________________
> Piglit mailing list
> Piglit at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/piglit