[Libreoffice-commits] dev-tools.git: scripts/qawrangler-stats.py
Marc Garcia
garcia.marc at gmail.com
Mon May 6 03:58:13 PDT 2013
scripts/qawrangler-stats.py | 147 +++++++++++++++++++++++++++++++++++---------
1 file changed, 119 insertions(+), 28 deletions(-)
New commits:
commit 75083a0e822d25ae7c3dfd3c86b335440d0a60b6
Author: Marc Garcia <garcia.marc at gmail.com>
Date: Mon Apr 29 12:40:37 2013 +0200
qa wranglers script file refactored
Signed-off-by: Petr Mladek <pmladek at suse.cz>
diff --git a/scripts/qawrangler-stats.py b/scripts/qawrangler-stats.py
index f73c38e..8b1c52f 100755
--- a/scripts/qawrangler-stats.py
+++ b/scripts/qawrangler-stats.py
@@ -1,44 +1,135 @@
#!/usr/bin/env python3
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# qawrangler-stats.py
+#
+# Returns statistics of most active wranglers and reporters for a given
+# month.
+#
+# For usage information, run:
+# qawrangler-stats.py -h
+#
-import sys, re
+import sys
+import re
+import datetime
import gzip
+import argparse
+import csv
from urllib.request import urlopen, URLError
from io import BytesIO
-from collections import Counter
+from collections import Counter, OrderedDict
-month = ''
-if len(sys.argv) >= 2:
- month = sys.argv[1]
+URL = 'http://lists.freedesktop.org/archives/libreoffice-bugs/{}.txt.gz'
+ENTITIES = OrderedDict({
+ 'changers': re.compile(r'^(.+)\schanged:$', re.MULTILINE),
+ 'reporters': re.compile(r'^\s*Reporter:\s(.+)$', re.MULTILINE),
+ 'commentators': re.compile(r'^--- Comment #\d+ from (.+) ---$',
+ re.MULTILINE),
+})
-url = 'http://lists.freedesktop.org/archives/libreoffice-bugs/' + month + '.txt.gz'
-print('Downloading ' + url)
+def get_parser():
+ """Returns an argparse instance, setting the arguments for the script"""
+ parser = argparse.ArgumentParser(
+ description='LibreOffice contributor statistics')
+ parser.add_argument('-m', '--month', dest='month', type=int,
+ default=datetime.date.today().month,
+ help='month to generate statistics from (default is current month)')
+ parser.add_argument('-y', '--year', dest='year', type=int,
+ default=datetime.date.today().year,
+ help='year to generate statistics from (default is current year)')
+ parser.add_argument('-n', '--num', dest='num', type=int, default=None,
+ help='number of top contributors of each category (default is all)')
+ parser.add_argument('--csv', dest='csv', action='store_true',
+ help='output information in CSV format')
-try:
- response = urlopen(url)
- buf = BytesIO(response.read())
- gz = gzip.GzipFile(fileobj=buf)
+ return parser
- txt = gz.read().decode('us-ascii')
- gz.close()
+def get_fname(date):
+ """Returns the `Libreoffice-bugs Archives' file name for a given a @date
+ datetime object. Note that only year and month are relevant, day is
+ ignored"""
+ return '{}-{}'.format(date.year, date.strftime('%B'))
- reportedby = re.compile(r'^.*Reporter:.(.*)$', re.MULTILINE)
- reporters = re.findall(reportedby, txt)
+def get_data(url):
+ """Fetches and uncompresses the `Libreoffice-bugs Archives' file given its
+ @url. The return of the function is the content of the gile as a string"""
+ try:
+ resp = urlopen(url)
+ except URLError:
+ sys.stderr.write('Error fetching {}'.format(url))
+ sys.exit(1)
+ else:
+ with gzip.GzipFile(fileobj=BytesIO(resp.read())) as f:
+ return f.read().decode('us-ascii')
- wrangledby = re.compile(r'^.*<(.*)> changed:$', re.MULTILINE)
- wranglers = re.findall(wrangledby, txt)
+def get_entity_values(data, pattern, num):
+ """Returns the first @num matches of a @pattern in the @data string. If
+ @num is None, all matches are returned"""
+ values = re.findall(pattern, data)
+ return Counter(values).most_common(num)
- topreporters = Counter(reporters).most_common(30)
- topwranglers = Counter(wranglers).most_common(30)
+def nice_print(values_dict, num_output, date):
+ """Prints to stdout the output of the script in a human readable way.
+ @values_dict is a dict containing a key for each entity (e.g. wranglers,
+ reporters, etc), and as values, a list of tuples containing the name and
+ the number of occurrences. An example:
- print('\n=== ' + month[5:] + ' ' + month[:4] + '===')
- print('\n--- Top 30 reporters ---')
- for reporter in topreporters:
- print('{0:40}{1:5d}'.format(reporter[0], reporter[1]))
+ >>> {
+ >>> 'wranglers': [
+ >>> ('Wrangler 1 <wrangler1 at his_email.com>', 30),
+ >>> # 30 is the number of times he wrangled
+ >>> ('Wrangler 2 <wrangler2 at his_email.com>', 15),
+ >>> ]
+ >>> }
- print('\n--- Top 30 wranglers ---')
- for wrangler in topwranglers:
- print('{0:40}{1:5d}'.format(wrangler[0], wrangler[1]))
+ @num_output is the number of top values in each categories are requested
+ to be displayed (e.g. number of top wranglers), and @date is a datetime
+ object containing the requested year and month"""
+ print('=== {} ==='.format(date.strftime('%B %Y')))
+ print()
+ for name, values in values_dict.items():
+ print('--- Top {} {} ---'.format(num_output or '', name))
+ print('\n'.join('{0:75}{1:5d}'.format(*v) for v in values))
+ print()
-except URLError:
- print('Unknown file - give an archive in the form YYYY-Month as argv[1]')
+def csv_print(values_dict):
+ """Print to stdout the output of the script in CSV format. @values_dict
+ has the same format as for the `nice_print' function. The CSV file has
+ the default format for Python's csv module (comma delimited, strings
+ quoted when necessary)"""
+ writer = csv.writer(sys.stdout)
+ for entity_name, values in values_dict.items():
+ for val_name, val_count in values:
+ writer.writerow([entity_name, val_name, val_count])
+
+def main(args):
+ """Main function of the program.
+ * Fetches the file for the requested month and date
+ * For each defined entity, gathers each match of its pattern,
+ and counts the number of occurrences
+ * Prints the retrieved information to stdout in the requested format
+ """
+ date = datetime.date(args.year, args.month, 1)
+ fname = get_fname(date)
+ url = URL.format(fname)
+ data = get_data(url)
+ values = OrderedDict()
+ for name, regex in ENTITIES.items():
+ values[name] = get_entity_values(data, regex, args.num)
+
+ if args.csv:
+ csv_print(values)
+ else:
+ nice_print(values, args.num, date)
+
+if __name__ == '__main__':
+ parser = get_parser()
+ args = parser.parse_args()
+ main(args)
More information about the Libreoffice-commits
mailing list