[Libreoffice-commits] dev-tools.git: scripts/qawrangler-stats.py

Mon May 6 03:58:13 PDT 2013

scripts/qawrangler-stats.py |  147 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 119 insertions(+), 28 deletions(-)

New commits:
commit 75083a0e822d25ae7c3dfd3c86b335440d0a60b6
Author: Marc Garcia <garcia.marc at gmail.com>
Date:   Mon Apr 29 12:40:37 2013 +0200

    qa wranglers script file refactored
    
    Signed-off-by: Petr Mladek <pmladek at suse.cz>

diff --git a/scripts/qawrangler-stats.py b/scripts/qawrangler-stats.py
index f73c38e..8b1c52f 100755
--- a/scripts/qawrangler-stats.py
+++ b/scripts/qawrangler-stats.py
@@ -1,44 +1,135 @@
 #!/usr/bin/env python3
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# qawrangler-stats.py
+#
+# Returns statistics of most active wranglers and reporters for a given
+# month.
+#
+# For usage information, run:
+#   qawrangler-stats.py -h
+#
 
-import sys, re
+import sys
+import re
+import datetime
 import gzip
+import argparse
+import csv
 from urllib.request import urlopen, URLError
 from io import BytesIO
-from collections import Counter
+from collections import Counter, OrderedDict
 
-month = ''
-if len(sys.argv) >= 2:
-    month = sys.argv[1]
+URL = 'http://lists.freedesktop.org/archives/libreoffice-bugs/{}.txt.gz'
+ENTITIES = OrderedDict({
+    'changers': re.compile(r'^(.+)\schanged:$', re.MULTILINE),
+    'reporters': re.compile(r'^\s*Reporter:\s(.+)$', re.MULTILINE),
+    'commentators': re.compile(r'^--- Comment #\d+ from (.+) ---$',
+        re.MULTILINE),
+})
 
-url = 'http://lists.freedesktop.org/archives/libreoffice-bugs/' + month + '.txt.gz'
-print('Downloading ' + url)
+def get_parser():
+    """Returns an argparse instance, setting the arguments for the script"""
+    parser = argparse.ArgumentParser(
+        description='LibreOffice contributor statistics')
+    parser.add_argument('-m', '--month', dest='month', type=int,
+        default=datetime.date.today().month,
+        help='month to generate statistics from (default is current month)')
+    parser.add_argument('-y', '--year', dest='year', type=int,
+        default=datetime.date.today().year,
+        help='year to generate statistics from (default is current year)')
+    parser.add_argument('-n', '--num', dest='num', type=int, default=None,
+        help='number of top contributors of each category (default is all)')
+    parser.add_argument('--csv', dest='csv', action='store_true',
+        help='output information in CSV format')
 
-try:
-    response = urlopen(url)
-    buf = BytesIO(response.read())
-    gz = gzip.GzipFile(fileobj=buf)
+    return parser
 
-    txt = gz.read().decode('us-ascii')
-    gz.close()
+def get_fname(date):
+    """Returns the `Libreoffice-bugs Archives' file name for a given a @date
+    datetime object. Note that only year and month are relevant, day is
+    ignored"""
+    return '{}-{}'.format(date.year, date.strftime('%B'))
 
-    reportedby = re.compile(r'^.*Reporter:.(.*)$', re.MULTILINE)
-    reporters = re.findall(reportedby, txt)
+def get_data(url):
+    """Fetches and uncompresses the `Libreoffice-bugs Archives' file given its
+    @url. The return of the function is the content of the gile as a string"""
+    try:
+        resp = urlopen(url)
+    except URLError:
+        sys.stderr.write('Error fetching {}'.format(url))
+        sys.exit(1)
+    else:
+        with gzip.GzipFile(fileobj=BytesIO(resp.read())) as f:
+            return f.read().decode('us-ascii')
 
-    wrangledby = re.compile(r'^.*<(.*)> changed:$', re.MULTILINE)
-    wranglers = re.findall(wrangledby, txt)
+def get_entity_values(data, pattern, num):
+    """Returns the first @num matches of a @pattern in the @data string. If
+    @num is None, all matches are returned"""
+    values = re.findall(pattern, data)
+    return Counter(values).most_common(num)
 
-    topreporters = Counter(reporters).most_common(30)
-    topwranglers = Counter(wranglers).most_common(30)
+def nice_print(values_dict, num_output, date):
+    """Prints to stdout the output of the script in a human readable way.
+    @values_dict is a dict containing a key for each entity (e.g. wranglers,
+    reporters, etc), and as values, a list of tuples containing the name and
+    the number of occurrences. An example:
 
-    print('\n=== ' + month[5:] + ' ' + month[:4] + '===')
-    print('\n--- Top 30 reporters ---')
-    for reporter in topreporters:
-        print('{0:40}{1:5d}'.format(reporter[0], reporter[1]))
+    >>> {
+    >>>    'wranglers': [
+    >>>        ('Wrangler 1 <wrangler1 at his_email.com>', 30),
+    >>>            # 30 is the number of times he wrangled
+    >>>        ('Wrangler 2 <wrangler2 at his_email.com>', 15),
+    >>>    ]
+    >>> }
 
-    print('\n--- Top 30 wranglers ---')
-    for wrangler in topwranglers:
-        print('{0:40}{1:5d}'.format(wrangler[0], wrangler[1]))
+    @num_output is the number of top values in each categories are requested
+    to be displayed (e.g. number of top wranglers), and @date is a datetime
+    object containing the requested year and month"""
+    print('=== {} ==='.format(date.strftime('%B %Y')))
+    print()
+    for name, values in values_dict.items():
+        print('--- Top {} {} ---'.format(num_output or '', name))
+        print('\n'.join('{0:75}{1:5d}'.format(*v) for v in values))
+        print()
 
-except URLError:
-    print('Unknown file - give an archive in the form YYYY-Month as argv[1]')
+def csv_print(values_dict):
+    """Print to stdout the output of the script in CSV format. @values_dict
+    has the same format as for the `nice_print' function. The CSV file has
+    the default format for Python's csv module (comma delimited, strings
+    quoted when necessary)"""
+    writer = csv.writer(sys.stdout)
+    for entity_name, values in values_dict.items():
+        for val_name, val_count in values:
+            writer.writerow([entity_name, val_name, val_count])
+
+def main(args):
+    """Main function of the program.
+     * Fetches the file for the requested month and date
+     * For each defined entity, gathers each match of its pattern,
+        and counts the number of occurrences
+     * Prints the retrieved information to stdout in the requested format
+    """
+    date = datetime.date(args.year, args.month, 1)
+    fname = get_fname(date)
+    url = URL.format(fname)
+    data = get_data(url)
+    values = OrderedDict()
+    for name, regex in ENTITIES.items():
+        values[name] = get_entity_values(data, regex, args.num)
+
+    if args.csv:
+        csv_print(values)
+    else:
+        nice_print(values, args.num, date)
+
+if __name__ == '__main__':
+    parser = get_parser()
+    args = parser.parse_args()
+    main(args)