[igt-dev] [PATCH i-g-t] scripts/xls_to_doc.py: add an script to partially import data from a spreadsheet

Tue Jun 6 16:16:16 UTC 2023

Hi Mauro,

On 2023-06-06 at 11:14:09 +0200, Mauro Carvalho Chehab wrote:
> From: Mauro Carvalho Chehab <mchehab at kernel.org>
> 
> Sometimes, it is interesting to import data from spreadsheets into
> the documentation. This is not error-prone, specially when using
> wildcards. The logic here does its best to update the data from a
> spreadsheet, but manual review is needed.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab at kernel.org>
> ---
>  scripts/test_list.py  |  35 +++--
>  scripts/xls_to_doc.py | 347 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 368 insertions(+), 14 deletions(-)
>  create mode 100755 scripts/xls_to_doc.py
> 
> diff --git a/scripts/test_list.py b/scripts/test_list.py
> index 2810c23586cb..f676024c9571 100755
> --- a/scripts/test_list.py
> +++ b/scripts/test_list.py
> @@ -412,7 +412,7 @@ class TestList:
>          # None of the filtering rules were applied
>          return False
>  
> -    def expand_subtest(self, fname, test_name, test, allow_inherit):
> +    def expand_subtest(self, fname, test_name, test, allow_inherit, with_lines = False, with_subtest_nr = False):
>  
>          """Expand subtest wildcards providing an array with subtests"""
>  
> @@ -426,6 +426,7 @@ class TestList:
>                  continue
>  
>              num_vars = summary.count('%')
> +            file_ln = self.doc[test]["subtest_line"][subtest]
>  
>              # Handle trivial case: no wildcards
>              if num_vars == 0:
> @@ -434,9 +435,7 @@ class TestList:
>                  subtest_dict["Summary"] = summary
>  
>                  for k in sorted(self.doc[test]["subtest"][subtest].keys()):
> -                    if k == 'Summary':
> -                        continue
> -                    if k == 'arg':
> +                    if k in [ 'Summary', 'arg', 'subtest_line' ]:
>                          continue
>  
>                      if not allow_inherit:
> @@ -445,6 +444,12 @@ class TestList:
>  
>                      subtest_dict[k] = self.doc[test]["subtest"][subtest][k]
>  
> +                if with_lines:
> +                    subtest_dict["line"] = file_ln
> +
> +                if with_subtest_nr:
> +                    subtest_dict["subtest_nr"] = subtest
> +
>                  subtest_array.append(subtest_dict)
>  
>                  continue
> @@ -499,9 +504,7 @@ class TestList:
>                  subtest_dict["Summary"] = arg_summary
>  
>                  for field in sorted(self.doc[test]["subtest"][subtest].keys()):
> -                    if field == 'Summary':
> -                        continue
> -                    if field == 'arg':
> +                    if field in [ 'Summary', 'arg', 'subtest_line' ]:
>                          continue
>  
>                      sub_field = self.doc[test]["subtest"][subtest][field]
> @@ -514,6 +517,12 @@ class TestList:
>  
>                      subtest_dict[field] = sub_field
>  
> +                if with_lines:
> +                    subtest_dict["line"] = file_ln
> +
> +                if with_subtest_nr:
> +                    subtest_dict["subtest_nr"] = subtest
> +
>                  subtest_array.append(subtest_dict)
>  
>                  # Increment variable inside the array
> @@ -567,9 +576,7 @@ class TestList:
>  
>                  dic[summary] = {}
>                  for field in sorted(subtest.keys()):
> -                    if field == 'Summary':
> -                        continue
> -                    if field == 'arg':
> +                    if field in [ 'Summary', 'arg', 'subtest_line' ]:
>                          continue
>                      dic[summary][field] = subtest[field]
>  
> @@ -635,9 +642,7 @@ class TestList:
>                  print("")
>  
>                  for field in sorted(subtest.keys()):
> -                    if field == 'Summary':
> -                        continue
> -                    if field == 'arg':
> +                    if field in [ 'Summary', 'arg', 'subtest_line' ]:
>                          continue
>  
>                      print(f":{field}:", subtest[field])
> @@ -979,7 +984,7 @@ class TestList:
>              current_test = ''
>              subtest_number = 0
>  
> -            for file_ln,file_line in enumerate(handle):
> +            for file_ln, file_line in enumerate(handle):
>                  file_line = file_line.rstrip()
>  
>                  if re.match(r'^\s*\*$', file_line):
> @@ -1021,6 +1026,7 @@ class TestList:
>                          self.doc[current_test]["Summary"] = match.group(1)
>                          self.doc[current_test]["File"] = fname
>                          self.doc[current_test]["subtest"] = {}
> +                        self.doc[current_test]["subtest_line"] = {}
>  
>                          if implemented_class:
>                              self.doc[current_test]["Class"] = implemented_class
> @@ -1057,6 +1063,7 @@ class TestList:
>  
>                      self.doc[current_test]["subtest"][current_subtest]["Summary"] = match.group(1)
>                      self.doc[current_test]["subtest"][current_subtest]["Description"] = ''
> +                    self.doc[current_test]["subtest_line"][current_subtest] = file_ln
>  
>                      if not arg_ref:
>                          arg_ref = arg_number
> diff --git a/scripts/xls_to_doc.py b/scripts/xls_to_doc.py
> new file mode 100755
> index 000000000000..ed56a02bf5ca
> --- /dev/null
> +++ b/scripts/xls_to_doc.py
> @@ -0,0 +1,347 @@
> +#!/usr/bin/env python3
> +# pylint: disable=C0301,C0302,C0103,C0116,C0114,R0912,R0914,R0915,R1702,C0115,R0913
> +# SPDX-License-Identifier: (GPL-2.0 OR MIT)
> +
> +## Copyright (C) 2023    Intel Corporation                 ##
> +## Author: Mauro Carvalho Chehab <mchehab at kernel.org>      ##
> +
> +import argparse
> +import json
> +import os
> +import re
> +import sys
> +
> +from openpyxl import load_workbook
> +
> +from test_list import TestList
> +
> +EPILOG=""
> +
> +#
> +# FillTests class definition
> +#
> +class FillTests(TestList):
> +    def __init__(self, config_path):
> +        self.tests = {}
> +        self.spreadsheet_data = {}
> +
> +        TestList.__init__(self, config_path)
> +
> +        self.testname_regex = re.compile(r'^\s*(igt@[^\n\@]+)\@?(\S*)\s*')
> +        self.key_has_wildcard = re.compile(r'\%?arg\[(\d+)\]')
> +        self.field_re = re.compile(r"(" + '|'.join(self.field_list.keys()) + r'):\s*(.*)', re.I)
> +
> +        for test in self.doc:                   # pylint: disable=C0206
> +            fname = self.doc[test]["File"]
> +
> +            name = re.sub(r'.*/', '', fname)
> +            name = re.sub(r'\.[\w+]$', '', name)
> +            name = "igt@" + name
> +
> +            subtest_array = self.expand_subtest(fname, name, test, True, True, True)
> +            for subtest_dict in subtest_array:
> +                name = subtest_dict["Summary"]
> +                del subtest_dict["Summary"]
> +
> +                match = self.testname_regex.match(name)
> +                if not match:
> +                    sys.exit(f"Error: can't parse {name}")
> +
> +                testname = match.group(1)
> +                if match.group(2):
> +                    subtest = match.group(2)
> +                else:
> +                    subtest = ''
> +
> +                if testname not in self.tests:
> +                    self.tests[testname] = {}
> +                    self.tests[testname]["subtests"] = {}
> +
> +                    self.tests[testname]["Test"] = test
> +                    self.tests[testname]["File"] = fname
> +
> +                self.tests[testname]["subtests"][subtest] = subtest_dict
> +
> +    def add_field(self, dic, field, value):
> +        if field in dic and dic[field] != '':
> +            fields = sorted(dic[field].split(", "))
> +            fields.append(value)
> +            value = ", ".join(sorted(fields))
> +
> +        dic[field] = value
> +
> +    def read_testlist(self, filename):
> +        if re.match("^xe", filename):
> +            return
> +        name = re.sub(r"(.*/)?(.*)\.testlist$", r"\2", filename)
> +        if name == "fast-feedback":
> +            name = "BAT"
> +        elif name == "eu-debugger-fast-feedback":
> +            name = "BAT eudebugger"
> +        elif name == "fast-feedback-extras-for-simulator":
> +            name = "BAT simulator"
> +        elif name == "fast-feedback_suspend":
> +            name = "suspend"
> +
> +        name = re.sub(r"eu-debugger", "eudebugger ", name)
> +        name = re.sub(r"\bbat\b", "BAT", name)
> +        name = re.sub(r"[._\-]", " ", name)
> +
> +        with open(filename, 'r', newline = '', encoding = 'utf8') as fp:
> +            for line in fp:
> +                match = re.match(r"^\s*(igt@[^\s\@]+)(\S*)\#?", line)
> +                if match:
> +                    testname = match.group(1)
> +                    subtest = match.group(2)
> +                    if testname not in self.tests:
> +                        self.tests[testname] = {}
> +                        self.tests[testname]["properties"] ={}
> +                        self.tests[testname]["subtests"] = {}
> +                    if subtest not in self.tests[testname]["subtests"]:
> +                        self.tests[testname]["subtests"][subtest] = {}
> +                    self.add_field(self.tests[testname]["subtests"][subtest], "Run type", name)
> +
> +    def get_testlists(self, path):
> +        # Create a dictionary with filenames
> +
> +        regex = re.compile(r".*\.testlist")
> +
> +        for root,d_names,f_names in os.walk(path):          # pylint: disable=W0612
> +            for filename in f_names:
> +                if regex.match(filename):
> +                    self.read_testlist(os.path.join(root, filename))
> +
> +    def process_spreadsheet_sheet(self, sheet):
> +
> +        column_list=[]
> +        for cell in sheet[1]:
> +            column_list.append(cell.value)
> +
> +        for row in range(2, sheet.max_row):
> +            if sheet[row][0].value is None:
> +                print(f"Ignoring sheet after A{row} row, as test name is empty")
> +                return
> +            if not isinstance(sheet[row][0].value, str):
> +                print(f"Ignoring A{row} row on {sheet.title}: test name is not a string: {sheet[row][0].value}")
> +                continue
> +            test_name = sheet[row][0].value.strip()
> +            if not re.match(r'^igt\@', test_name):
> +                print(f"Ignoring A{row} row on {sheet.title}: not a valid test name: {test_name}")
> +                continue
> +
> +            if test_name not in self.spreadsheet_data:
> +                self.spreadsheet_data[test_name] = {}
> +
> +            i = 1
> +            for col in range(2, sheet.max_column + 1):
> +                val = sheet.cell(row=row, column=col).value
> +                if val:
> +                    if isinstance(val, str):
> +                        val = val.strip()
> +
> +                    self.spreadsheet_data[test_name][column_list[i]] = val
> +
> +                i += 1
> +
> +    def read_spreadsheet_file(self, fname, sheets):
> +
> +        # Iterate the loop to read the cell values
> +        wb = load_workbook(filename = fname)
> +
> +        # Handle first "normal" sheets
> +        for sheet in wb:
> +            if sheets and sheet.title not in sheets:
> +                continue
> +
> +            self.process_spreadsheet_sheet(sheet)
> +
> +        return dict(sorted(self.spreadsheet_data.items()))
> +
> +    def change_value(self, content, subtest, line, field, value):
> +
> +        current_field = None
> +        i = line
> +        while 1:
> +            i += 1
> +            if i >= len(content):
> +                break
> +
> +            file_line = content[i]
> +
> +            if re.match(r'^\s*\*\/\s*$', file_line):
> +                break
> +
> +            file_line = re.sub(r'^\s*\* ?', '', file_line)
> +
> +            match = re.match(r'^SUBTESTS?:\s*(.*)', file_line)
> +            if match and match.group(1) != subtest:
> +                break
> +
> +            match = re.match(r'^TEST:\s*(.*)', file_line)
> +            if match and match.group(1) != subtest:
> +                break
> +
> +            match = re.match(r'arg\[(\d+)\]:\s*(.*)', file_line)
> +            if match:
> +                break
> +
> +            match = re.match(r'\@(\S+):\s*(.*)', file_line)
> +            if match:
> +                break
> +
> +            match = re.match(r'arg\[(\d+)\]\.values:\s*(.*)', file_line)
> +            if match:
> +                break
> +
> +            match = re.match(self.field_re, file_line)
> +            if match:
> +                current_field = self.field_list[match.group(1).lower()]
> +                if current_field != field:
> +                    continue
> +                content[i] = ""
> +
> +            # Handle continuation lines
> +            if current_field:
> +                match = re.match(r'\s+(.*)', file_line)
> +                if match:
> +                    if current_field != field:
> +                        continue
> +
> +                    content[i] = ""
> +
> +        content.insert(i, f' * {field}: {value}\n')
> +
> +    def parse_spreadsheet(self, fname, sheets = None):
> +        if not os.path.isfile(fname):
> +            print(f'Warning: {fname} not found. Skipping spreadsheet parser')
> +            return
> +
> +        data = self.read_spreadsheet_file(fname, sheets)
> +
> +        for test, row in data.items():
> +            match = self.testname_regex.match(test)
> +            if not match:
> +                sys.exit(f"Error: can't parse {test}")
> +
> +            testname = match.group(1)
> +            if match.group(2):
> +                subtest = match.group(2)
> +            else:
> +                subtest = ''
> +
> +            if testname not in self.tests:
> +                print(f"Ignoring {test}, as test is not documented.")
> +                continue
> +
> +            if subtest not in self.tests[testname]["subtests"]:
> +                self.tests[testname]["subtests"][subtest] = {}
> +
> +            for key, value in row.items():
> +                self.tests[testname]["subtests"][subtest][key] = value
> +
> +    def update_test_file(self, testname):
> +        try:
> +#            print(f"Updating {testname}")
> +
> +            sourcename = self.tests[testname]["File"]
> +            with open(sourcename, 'r', encoding='utf8') as in_fp:
> +                content = in_fp.read().splitlines(True)
> +        except EnvironmentError:
> +            sys.exit(f'Failed to read {sourcename}')
> +
> +        try:
> +
> +            test_nr = self.tests[testname]["Test"]
> +
> +            for subtest, subtest_content in sorted(self.tests[testname]["subtests"].items()):
> +                if "line" not in subtest_content:
> +                    print(f"Warning: didn't find where {subtest} is documented.")
> +                    continue
> +
> +                line = subtest_content['line']
> +                subtest_nr = subtest_content['subtest_nr']
> +
> +                if subtest_nr not in self.doc[test_nr]["subtest"]:
> +                    print(f"Error: missing subtest {subtest_nr} at {self.doc[test_nr]['subtest']}")
> +
> +                doc_content = self.doc[test_nr]["subtest"][subtest_nr]
> +
> +                # Handling wildcards is not easy. Let's just skip those
> +                for field, value in sorted(subtest_content.items()):
> +                    if field in [ 'line', 'subtest_nr' ]:
> +                        continue
> +                    doc_value = doc_content.get(field)
> +                    if doc_value:
> +                        if self.key_has_wildcard.search(doc_value):
> +                            print(f"Warning: {subtest} field {field} has wildcards.")
> +                            continue
> +                        if doc_value == value:
> +                            print(f"{testname}@{subtest} field {field}: Value unchanged. Ignoring it")
> +                            continue
> +
> +                    print(f"Update {testname}@{subtest} field {field} on line {line}:")
> +                    print(f"  Change from {doc_value} to {value}")
> +
> +                    # Just in case, handle continuation lines
> +                    value = re.sub(r"\n", "\n *   ", value)
> +
> +                    self.change_value(content, subtest, line, field, value)
> +
> +                    # Update line numbers after insert
> +                    skip = True
> +                    for sub, sub_content in sorted(self.tests[testname]["subtests"].items()):
> +                        if sub == subtest:
> +                            skip = False
> +                            continue
> +                        if skip:
> +                            continue
> +                        sub_line = sub_content['line']
> +                        if sub_line >= line:
> +                            sub_content['line'] += 1
> +
> +        except EnvironmentError as err:
> +            sys.exit(f'Error: {err}')
> +
> +        # Write changes
> +        try:
> +            print(f"Writing to {sourcename}")
> +            with open(sourcename, 'w', encoding='utf8') as out_fp:
> +                out_fp.write("".join(content))
> +        except EnvironmentError:
> +            print(f'Failed to write to {sourcename}')
> +
> +    def update_test_files(self):
> +
> +        """ Populate documentation """
> +
> +        for testname in self.tests:
> +            self.update_test_file(testname)
> +
> +######
> +# Main
> +######
> +
> +parser = argparse.ArgumentParser(description=__doc__,
> +                                    formatter_class = argparse.RawDescriptionHelpFormatter,
> +                                    epilog = EPILOG)
> +parser.add_argument("--config", required = True,
> +                    help="JSON file describing the test plan template")
> +parser.add_argument("--xls", required = True,
> +                    help="Input XLS file.")
> +parser.add_argument("--sheets", nargs = "*",
> +                    help="Input only some specific sheets from the XLS file.")
> +
> +parse_args = parser.parse_args()
> +
> +fill_test = FillTests(parse_args.config)
> +
> +fill_test.parse_spreadsheet(parse_args.xls, parse_args.sheets)
> +
> +## DEBUG: remove it later on
> +with open("fill_test.json", "w", encoding='utf8') as write_file:
> +    json.dump(fill_test.tests, write_file, indent = 4)
> +with open("doc.json", "w", encoding='utf8') as write_file:
> +    json.dump(fill_test.doc, write_file, indent = 4)
> +
> +
> +fill_test.update_test_files()

Please add also a few examples for usage.
With that,

Acked-by: Kamil Konieczny <kamil.konieczny at linux.intel.com>

> -- 
> 2.40.1
>