[Libreoffice-commits] dev-tools.git: scripts/regression-hotspots.py

Tue Jul 6 06:31:46 UTC 2021

scripts/regression-hotspots.py |   50 ++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

New commits:
commit 48d6a3e2c4294f2f41ce7c8fe226ccfc87f47be0
Author:     Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>
AuthorDate: Sun Jul 4 09:51:59 2021 +0300
Commit:     Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>
CommitDate: Tue Jul 6 08:31:27 2021 +0200

    regression-hotspots: replace filename regex with handy git log options
    
    Old regex had a bug that made it skip some files.
    Switch from sh to GitPython.
    Introduce an excluding regex to leave out uninteresting files.
    Reorder the output sections and add wikitext headings.
    
    Change-Id: I1103d8d34d2a146d64f8aae57ca921716e88987d
    Reviewed-on: https://gerrit.libreoffice.org/c/dev-tools/+/118367
    Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>
    Tested-by: Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>

diff --git a/scripts/regression-hotspots.py b/scripts/regression-hotspots.py
index c2b5600..cec2781 100755
--- a/scripts/regression-hotspots.py
+++ b/scripts/regression-hotspots.py
@@ -6,15 +6,16 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
 #
+# Uses https://github.com/gitpython-developers/GitPython
+# Results published in https://wiki.documentfoundation.org/Development/RegressionHotspots
 
 import sys
 import re
-import sh
+import git
 from urllib.request import urlopen, URLError
 from io import BytesIO
-
 def get_fixed_regression_bugs():
-    url = 'https://bugs.libreoffice.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0'
+    url = 'https://bugs.documentfoundation.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0'
     try:
         resp = urlopen(url)
     except URLError:
@@ -24,7 +25,6 @@ def get_fixed_regression_bugs():
     for line in [raw.decode('utf-8').strip('\n') for raw in BytesIO(resp.read())][1:]:
         bug_ids.append(int(line))
     return bug_ids
-
 def get_dir_counts(file_counts, level):
     dir_counts = {}
     for (filename, count) in file_counts.items():
@@ -36,37 +36,33 @@ def get_dir_counts(file_counts, level):
             else:
                 dir_counts[dirpart]=count
     return dir_counts
-
 def print_counts(counts):
     printorder = reversed(sorted((count, name) for (name, count) in counts.items()))
     for count in printorder:
         print('%5d %s' % (count[0], count[1]))
-
 if __name__ == '__main__':
     file_counts = {}
-    statregex = re.compile('^ ([^ ]+) \|')
+    excluderegex = re.compile(r'qa/|icon-themes/|extras/source/gallery/|extras/source/palettes/|extras/source/templates/|extras/source/truetype/|helpcontent2|dictionaries|translations|download\.lst|\.png|\.patch')
     fixed_regression_ids = get_fixed_regression_bugs()
     sys.stderr.write('found %d fixed regressions: %s\n' % (len(fixed_regression_ids), fixed_regression_ids))
     for bug_id in fixed_regression_ids:
         sys.stderr.write('working on bug %d\n' % bug_id)
-        # FIXME: use --numstat instead, which does not abbreviate filenames
-        logstat = sh.git('--no-pager', 'log', '--grep', '[fdo|tdf]#%d' % bug_id, '--stat')
-        for line in logstat:
-            match = statregex.search(str(line))
-            if match and match.group(1):
-                filename = match.group(1)
-                sys.stderr.write('regression fix touched file: %s\n' % filename)
-                if filename in file_counts:
-                    file_counts[filename]+=1
-                else:
-                    file_counts[filename]=1
-    print('top level dirs:')
-    print_counts(get_dir_counts(file_counts, 1))
-    print('\nsecond level dirs:')
-    print_counts(get_dir_counts(file_counts, 2))
-    print('\nthird level dirs:')
-    print_counts(get_dir_counts(file_counts, 3))
-    print('\nfourth level dirs:')
-    print_counts(get_dir_counts(file_counts, 4))
-    print('\nfiles:')
+        lognames = git.Git('.').execute(['git', 'log', '--grep=[fdo|tdf]#'+str(bug_id), '--pretty=tformat:', '--name-only'])
+        if lognames:
+            for filename in lognames.split('\n'):
+                if not excluderegex.search(filename):
+                    sys.stderr.write('regression fix touched file: %s\n' % filename)
+                    if filename in file_counts:
+                        file_counts[filename]+=1
+                    else:
+                        file_counts[filename]=1
+    print('=== files ===\n')
     print_counts(file_counts)
+    print('\n=== fourth level dirs ===\n')
+    print_counts(get_dir_counts(file_counts, 4))
+    print('\n=== third level dirs ===\n')
+    print_counts(get_dir_counts(file_counts, 3))
+    print('\n=== second level dirs ===\n')
+    print_counts(get_dir_counts(file_counts, 2))
+    print('\n=== top level dirs ===\n')
+    print_counts(get_dir_counts(file_counts, 1))