[Libreoffice-commits] dev-tools.git: scripts/os-stats.pl

Michael Meeks michael.meeks at collabora.com
Wed Oct 14 07:29:58 PDT 2015


 scripts/os-stats.pl |  204 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)

New commits:
commit 6f1d1f6212bff5576822e94de6f8d6e695e07377
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Wed Oct 14 14:51:56 2015 +0100

    Script to crunch os statistics.

diff --git a/scripts/os-stats.pl b/scripts/os-stats.pl
new file mode 100755
index 0000000..ff889ea
--- /dev/null
+++ b/scripts/os-stats.pl
@@ -0,0 +1,204 @@
+#!/usr/bin/perl -w
+
+#
+# A script to attempt to determine OS versions from user-agent strings.
+#
+
+use strict;
+use POSIX;
+
+my %global_date_to_epoch;
+
+sub ymd_to_epoch($$$)
+{
+    my ($year, $month, $day) = @_;
+
+    my $key = "$year-$month-$day";
+
+    if ( ! defined $global_date_to_epoch{$key} ) {
+	# 1970-01-01 is Thursday, add 3 days (259200 seconds), and divide
+	my $seconds = POSIX::strftime( "%s", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual
+
+	# remember the ISO week
+	my $week = POSIX::strftime( "%G-%V", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual
+	$global_date_to_epoch{$key} = $week;
+    }
+    return $global_date_to_epoch{$key};
+}
+
+# de-mangle windows user-agents
+my %win_ver_hash = (
+    'NT 10.0' => 'Windows 10',
+    'NT 9.0' => 'Windows 9',
+    'NT 6.3' => 'Windows 8.1',
+    'NT 6.2' => 'Windows 8',
+    'NT 7.0' => 'Windows 7',
+    'NT 6.1' => 'Windows 7',
+    'NT 6.0' => 'Windows Vista',
+    'NT 5.2' => 'Windows Server 2003',
+    'NT 5.1' => 'Windows XP',
+    'NT 5.01' => 'Windows 2000 SP1',
+    'NT 5.0' => 'Windows 2000',
+    'NT 4.0' => 'Windows NT 4.0',
+    '98; Win 9x 4.90' => 'Windows ME',
+    '98' => 'Windows 98',
+    '95' => 'Windows 95',
+    'CE' => 'Windows CE',
+    );
+
+sub win_real_ver($)
+{
+    my $vin = shift;
+    my $vout = $win_ver_hash{$vin};
+    if (!defined $vout) {
+	$vout = 'Windows other';
+    }
+    return $vout;
+}
+
+my %month_to_num = (
+    'Jan' => '01',
+    'Feb' => '02',
+    'Mar' => '03',
+    'Apr' => '04',
+    'May' => '05',
+    'Jun' => '06',
+    'Jul' => '07',
+    'Aug' => '08',
+    'Sep' => '09',
+    'Oct' => '10',
+    'Nov' => '11',
+    'Dec' => '12',
+);
+
+my %totals;
+my %breakdown_by_week;
+
+sub analyze_dir($)
+{
+    my $dirname = shift;
+
+    open LOG, "( cd $dirname ; bzcat download.documentfoundation.org*access*.bz2 2>/dev/null ) |" or die "Cannot open the logs";
+    while (<LOG>) {
+	my $line = $_;
+	if ( /^([^ ]+) - - \[([^\/]+)\/([^\/]+)\/([^:]+):([0-9][0-9])[^\]]*\] "[^"]*" [^ ]+ [^ ]+ "[^"]*" "(.*)"/ ) {
+	    my ( $ip, $day, $month, $year, $hour, $useragent ) =
+		( $1, $2, $month_to_num{$3}, $4, $5, $6, );
+
+	    next if ($useragent eq '-' || $useragent eq 'setup');
+
+	    # download tools & skip bots
+	    next if ($useragent =~ m|Wget| || $useragent =~ m|chocolatey|);
+	    next if ($useragent =~ m|lftp/| || $useragent =~ m|curl/|);
+	    next if ($useragent =~ m|FPS-DAV-Client/| || $useragent =~ m|Deluge/|);
+	    next if ($useragent =~ m|FPS-GET-Client/| || $useragent =~ m|SoftonicDownloader/|);
+	    next if ($useragent =~ m|CCBot/| || $useragent =~ m|AhrefsBot/|);
+	    next if ($useragent =~ m|SputnikBot/| || $useragent =~ m|YandexBot/|);
+	    next if ($useragent =~ m|MojeekBot/| || $useragent =~ m|Webmon /|);
+	    next if ($useragent =~ m|bingbot/| || $useragent =~ m|Baiduspider/|);
+	    next if ($useragent =~ m|Yahoo! Slurp| || $useragent =~ m|portscout/|);
+	    next if ($useragent =~ m|CRAZYWEBCRAWLER | || $useragent =~ m|FDM |);
+	    next if ($useragent =~ m|YisouSpider| || $useragent =~ m|ABCdatos BotLink/|);
+	    next if ($useragent =~ m|ia_archiver | || $useragent =~ m|BTWebClient/|);
+	    next if ($useragent =~ m|portroach/| || $useragent =~ m|Java/|);
+	    next if ($useragent =~ m|Googlebot| || $useragent =~ m|escan |);
+	    next if ($useragent =~ m|Python-urllib/| || $useragent =~ m|PycURL/|);
+	    next if ($useragent =~ m|fetch | || $useragent =~ m|WWWC/|);
+	    next if ($useragent =~ m|Xovibot/| || $useragent =~ m|Dolphin |);
+	    next if ($useragent =~ m|Megaindex.ru/| || $useragent =~ m|idmarch  |);
+	    next if ($useragent =~ m|coccoc/| || $useragent =~ m|WebMon |);
+	    next if ($useragent =~ m|Download Master| || $useragent =~ m|Downloader |);
+
+	    # Misc. foo to reduce noise
+	    next if ($useragent =~ /GetRedirect/ || $useragent =~ /setup_\d/ ||
+		     $useragent =~ /GetLength/ || $useragent =~ /xbps-src-update-check/);
+
+	    my $year_week = ymd_to_epoch($year, $month, $day);
+
+	    my $key;
+	    if ($useragent =~ m/Windows \s*([^;\)]+)\s*[;\)]/) {
+#		print "good: Windows: $1\n";
+		$key = win_real_ver($1);
+	    } elsif ($useragent =~ m/Macintosh;.*Intel Mac OS X\s*([0-9_]+)/) {
+		my $short = $1;
+		$short =~ s/_[0-9]+$//;
+#		print "good: OS/X: $short\n";
+		$key = "OSX $short";
+	    } elsif ($useragent =~ m/X11; Linux/ ||
+		     $useragent =~ m/X11; Ubuntu/ ||
+		     $useragent =~ m/Linux; /) {
+		$key = "Linux";
+#		print "good: linux\n";
+	    } elsif ($useragent =~ m|[Bb]ot/|) {
+#		print "auto-bot: '$useragent'";
+	    } else {
+		$key = "other";
+#		print "odd: '$useragent'\n";
+	    }
+	    if (defined $key) {
+		$totals{$key}++;
+		$breakdown_by_week{$year_week}{$key}++;
+	    }
+	} else {
+	    if ($line =~ m|Wget/| || $line =~ m|CCBot/|) {
+#		print STDERR "bot? '$line'";
+	    } else {
+		print STDERR "bad line: '$line'\n";
+	    }
+	}
+    }
+    close LOG;
+}
+
+sub scan_dirs($);
+sub scan_dirs($)
+{
+    my $dirname = shift;
+
+    print STDERR "analyzing: $dirname\n";
+    analyze_dir($dirname);
+    opendir(my $dirh, $dirname) || die "Can't open $dirname: $!";
+    my @subdirs;
+    while (my $subdir = readdir($dirh)) {
+	next if ($subdir =~ m/^\./);
+	push @subdirs, $subdir if -d "$dirname/$subdir";
+    }
+    closedir $dirh;
+
+    for my $subdir (@subdirs) {
+	scan_dirs ("$dirname/$subdir");
+    }
+}
+
+my $toplevel = `pwd`;
+chomp($toplevel);
+scan_dirs ($toplevel);
+
+my @os_list = sort keys %totals;
+
+print "Generated on: " . qx(date --rfc-3339=seconds) . "\n";
+
+print "Totals:\n";
+for my $os (@os_list) {
+    print "$os\t".$totals{$os}."\n";
+}
+
+print "By week:\n";
+
+print "year/week\t";
+for my $os (@os_list) {
+    print "$os\t";
+}
+print "\n";
+
+for my $week (sort keys %breakdown_by_week) {
+    print "$week\t";
+    for my $os (@os_list) {
+	if (defined $breakdown_by_week{$week}{$os}) {
+	    print $breakdown_by_week{$week}{$os}."\t";
+	} else {
+	    print "0\t";
+	}
+    }
+    print "\n";
+}


More information about the Libreoffice-commits mailing list