[Libreoffice-commits] dev-tools.git: 2 commits - scripts/download-stats.pl

Wed Apr 3 08:43:57 PDT 2013

scripts/download-stats.pl |  905 +++++++++++++++++++++++++---------------------
 1 file changed, 501 insertions(+), 404 deletions(-)

New commits:
commit c78dcc2843b733c7ff237dea178d7599f9d1f730
Author: Michael Meeks <michael.meeks at suse.com>
Date:   Wed Apr 3 16:42:47 2013 +0100

    add apache log analysis tool to generate download stats.
    
    Kendy wrote at least the hardest half.

diff --git a/scripts/download-stats.pl b/scripts/download-stats.pl
new file mode 100755
index 0000000..3e128e0
--- /dev/null
+++ b/scripts/download-stats.pl
@@ -0,0 +1,801 @@
+#!/usr/bin/perl -w
+
+use strict;
+use threads ('yield',
+	     'stack_size' => 64*4096,
+	     'exit' => 'threads_only',
+	     'stringify');
+use POSIX qw(strftime);
+
+my $verbose = 0;
+my $rsync_first = 0;
+my $cpus_to_use = 32;    # level of parallelism
+my $bzcat_grouping = 10; # files to pass to bzcat at once
+my $path_to_log_tree;
+my $threaded = 1;
+
+my %month_to_num = (
+    'Jan' => '01',
+    'Feb' => '02',
+    'Mar' => '03',
+    'Apr' => '04',
+    'May' => '05',
+    'Jun' => '06',
+    'Jul' => '07',
+    'Aug' => '08',
+    'Sep' => '09',
+    'Oct' => '10',
+    'Nov' => '11',
+    'Dec' => '12',
+);
+
+my %products;
+my %allversions;
+my %all_files_list;
+my %date_product_count;
+my %date_version_count;
+my $total_downloads;
+
+# FIXME: ODF is -incredibly- lame in this regard ... we badly want R1C1 style referencing here [!]
+sub coltoref($)
+{
+    my $col = shift;
+    die ("odff needs R1C1") if ($col > 25);
+    return chr (ord('A') + $col);
+}
+
+sub print_date_cell($$)
+{
+    my ($style,$date) = @_;
+    # sadly not truly a date but a year + ISO week number.
+print << "EOF"
+            <table:table-cell table:style-name="$style" office:value-type="string">
+                  <text:p>$date</text:p>
+	    </table:table-cell>
+EOF
+;
+}
+
+sub find_logs($);
+sub find_logs($)
+{
+    my $path = shift;
+    my $dirh;
+    my @logfiles;
+
+    if (-f $path ) {
+	if ($path =~ m/documentfoundation\.org.*access_log/) {
+	    if ($verbose) {
+		print STDERR "hit: $path\n";
+	    }
+	    return $path;
+	} else {
+	    return;
+	}
+    }
+
+    opendir ($dirh, $path) || die "can't open '$path': $!";
+    while (my $name = readdir ($dirh)) {
+	next if ($name =~ m/^\./);
+	push @logfiles, find_logs("$path/$name");
+    }
+    close ($dirh);
+
+    return @logfiles;
+}
+
+sub is_uninteresting_file($)
+{
+    my $file = shift;
+
+    return 1 if ( $file =~ /^$/ );
+    return 1 if ( $file =~ /^{/ );
+    return 1 if ( $file =~ /^%/ );
+    return 1 if ( $file =~ /^debian-repo\/testing\// );
+    return 1 if ( $file =~ /^\/libreoffice\/old\// );
+    return 1 if ( $file =~ /^\/libreoffice\/src\// );
+    return 1 if ( $file =~ /^\/robots\.txt$/ );
+    return 1 if ( $file =~ /\/index\.php$/ );
+    return 1 if ( $file =~ /\/a\.sh$/ );
+    return 1 if ( $file =~ /^\/TIMESTAMP/ );
+
+    # ignore source
+    return 1 if ( $file =~ m|/src/| );
+
+    # ignore android remote
+    return 1 if ( $file =~ m/ImpressRemote.apk$/ );
+
+    # anywhere
+    return 1 if ( $file =~ /\/customer_testimonials.php/ );
+
+    # anything that is missing an extension (directory names, metafiles) and slash
+    return 1 if ( $file =~ /\/[^.\/]+$/ );
+    return 1 if ( $file =~ /^[^\/]+$/ );
+
+    # not interesting extensions
+    return 1 if ( $file =~ /\/$/ );
+    return 1 if ( $file =~ /\?C=[MNS];O=[AD]$/ );
+    return 1 if ( $file =~ /\.asc$/ );
+    return 1 if ( $file =~ /\.btih$/ );
+    return 1 if ( $file =~ /\.css$/ );
+    return 1 if ( $file =~ /\/favicon\.ico$/ );
+    return 1 if ( $file =~ /\.gif$/ );
+    return 1 if ( $file =~ /\.gpg$/ );
+    return 1 if ( $file =~ /\.html$/ );
+    return 1 if ( $file =~ /\.info\.php$/ );
+    return 1 if ( $file =~ /\.log$/ );
+    return 1 if ( $file =~ /\.magnet$/ );
+    return 1 if ( $file =~ /\.md5$/ );
+    return 1 if ( $file =~ /\.meta4$/ );
+    return 1 if ( $file =~ /\.metalink$/ );
+    return 1 if ( $file =~ /\.mirrorlist$/ );
+    return 1 if ( $file =~ /\/Packages$/ );
+    return 1 if ( $file =~ /\/Packages\.bz2$/ );
+    return 1 if ( $file =~ /\/Packages\.gz$/ );
+    return 1 if ( $file =~ /\/Packages\.lzma$/ );
+    return 1 if ( $file =~ /\/Packages\.xz$/ );
+    return 1 if ( $file =~ /\.png$/ );
+    return 1 if ( $file =~ /\/Release$/ );
+    return 1 if ( $file =~ /\.sha1$/ );
+    return 1 if ( $file =~ /\.sha256$/ );
+    return 1 if ( $file =~ /\.torrent$/ );
+    return 1 if ( $file =~ /\.zsync$/ );
+
+    # noise
+    return 1 if ( $file =~ /%/ );
+    return 1 if ( $file =~ /&/ );
+
+    # is interesting ...
+    return 0;
+}
+
+sub characterise($$)
+{
+    my ($filerec, $file) = @_;
+
+    # currently based entirely on the filename
+    $file =~ m|/([^/]+)$| || die "not a filename: '$file'";
+    my $name = $1;
+
+    $name =~ s/BrOffice/LibO/; # BrOffice is obsolete
+    $name =~ s/-/_/g; # use underscores everywhere
+
+    my @elements = split(/_/, $name);
+
+    if (@elements < 2) {
+	print STDERR "Unknown filename '$name'\n";
+	return 0;
+    }
+    my $prod = $elements[0];
+
+    if ($prod eq 'LibO' &&
+	( $elements[1] eq 'SDK' || $elements[1] eq 'Dev' ) ) { # ignore sdk + dev-builds
+	return 0;
+
+    } elsif ( $prod eq 'LibreOfficePortableTest') {     # ignore test builds
+	return 0;
+
+    # Odd - legacy stuff
+    } elsif ($prod eq 'libreoffice' && (
+		 $name =~ m/\.tar\.gz$/ ||
+		 $name =~ m/\.tar.bz2$/ ||
+		 $name =~ m/\.tar\.xz$/)) { # source
+	return 0;
+
+    # obsolete snafu
+    } elsif ($prod eq 'libo3.4.4' && $name =~ /\.iso$/) {
+	$filerec->{version} = '3.4.4';
+	$filerec->{product} = 'Win-dvd';
+
+    # LibreOffice portable
+    } elsif ($prod eq 'LibreOfficePortable') {
+	if ($name =~ m/(\d\.\d\.\d).*\.exe$/) {
+	    $filerec->{version} = $1;
+	    $filerec->{product} = 'Win-portable';
+	} else {
+	    print STDERR "Unknown portable version in '$name'\n";
+	    return 0;
+	}
+
+    # Bread and butter:
+    } elsif ($prod eq 'LibO' || $prod eq 'LibreOffice' ||
+	     $prod eq 'LO' || $prod eq 'LibOx') {
+	$filerec->{version} = $elements[1];
+
+	my $product;
+	if ($name =~ m/\.iso$/) {
+	    if ($name =~ m/allproducts/) {
+		$product = "All-dvd";
+	    } else {
+		$product = "Win-dvd";
+	    }
+	} elsif ($name =~ m/Win_x86/) {
+	    $product = "Win-x86";
+	} elsif ($name =~ m/Linux_x86-64/) {
+	    $product = "Linux-x86-64";
+	} elsif ($name =~ m/Linux_x86/) {
+	    $product = "Linux-x86";
+	} elsif ($name =~ m/MacOS_x86/) {
+	    $product = "Mac-x86";
+	} elsif ($name =~ m/MacOS_PPC/) {
+	    $product = "Mac-PPC";
+	} else {
+	    print STDERR "Unknown product for '$name'\n";
+	}
+	$filerec->{product} = $product;
+
+    } else {
+	print STDERR "Unknown initial element '$prod' of '$name'\n";
+	return 0;
+    }
+
+    # characterise helppacks and langpacks
+
+    $name =~ s/helppack/langpack/g; # destructive !
+    $filerec->{langpack} = 0;
+    if ($name =~ /_langpack_/ ) {
+	$filerec->{langpack} = 1;
+    }
+#    print STDERR "'$name' is a lang-pack: " . $filerec->{langpack} . "\n";
+
+    return 1;
+}
+
+sub parse_log($)
+{
+    my $log = shift;
+    my @files;
+
+    # in order to get a good representation of weeks at the start and end of the
+    # year (so that we don't get 1/2 of the data at the end, and 1/2 at the start
+    # of the next one), we use "epoch_week" - week since 1970-01-01 (1st week)
+    my $old_date = "";
+    my $epoch_week;
+    my %epoch_week_to_year;
+
+    while (<$log>) {
+	my $line = $_;
+	if ( $line =~ m/^([^ ]+) - - \[([^\/]+)\/([^\/]+)\/([^:]+):([0-9][0-9])[^\]]*\] "GET ([^"]*) HTTP\/[^"]*" ([0-9]+) ([0-9]+)/ ) {
+	    #print "$1, $2, $3, $4, $5, $6\n";
+	    my ( $ip, $day, $month, $year, $hour, $file, $status, $size ) = ( $1, $2, $month_to_num{$3}, $4, $5, $6, $7, $8 );
+
+	    # we are interested only in redirects and successful downloads
+	    next if ( $status != 302 && $status != 200 && $status != 206 );
+
+	    # partial download? - only count when it finished
+	    if ( $status == 206 )
+	    {
+		if ( $line =~ / size:([0-9]+) bytes=([0-9]+)-([0-9]*)$/ )
+		{
+		    my ( $wanted, $from ) = ( $1, $2 );
+		    next if ( $wanted != $from + $size );
+		}
+		else {
+		    next;
+		}
+	    }
+
+	    # canonicalize
+	    $file =~ s/^\s+//;
+	    $file =~ s/\s+$//;
+	    $file =~ s/^http:\/\/download.documentfoundation.org//g;
+	    $file =~ s/\/\//\//g;
+	    $file =~ s/\?.*//g;
+	    $file =~ s/;jsessionid=.*//g;
+	    $file =~ s/\/libreoffice\/box\///g;
+	    $file =~ s/\/libreoffice\/old\///g;
+	    $file =~ s/\/libreoffice\/portable\///g;
+	    $file =~ s/\/libreoffice\/stable\///g;
+	    $file =~ s/\/libreoffice\/testing\///g;
+
+	    # not interesting path starts
+	    next if ( is_uninteresting_file ($file) );
+
+	    my %filerec;
+	    $filerec{file} = $file;
+
+	    next if ( ! characterise(\%filerec, $file) );
+
+	    # update the $epoch_week, if necessary
+	    if ( "$year-$month-$day" ne $old_date ) {
+		$old_date = "$year-$month-$day";
+
+		# 1970-01-01 is Thursday, add 3 days (259200 seconds), and divide
+		my $seconds = POSIX::strftime( "%s", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual
+		$epoch_week = sprintf( "%d", ($seconds + 259200) / 604800 );
+
+		# remember the week
+		my $week = POSIX::strftime( "%V", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual
+		$epoch_week_to_year{$epoch_week} = sprintf( "$year-w%02d", $week );
+	    }
+
+	    $filerec{date} = $epoch_week;
+	    $filerec{pretty_date} = $epoch_week_to_year{$epoch_week};
+
+	    push @files, \%filerec;
+	}
+#	elsif ($verbose) { # don't touch a global variable it's bad news
+#	    print STDERR "invalid line in apache logs: '$line'\n";
+#	}
+    }
+
+    return @files;
+}
+
+sub parse_type($@)
+{
+    my $type = shift;
+    my @file_list = @_;
+    my @results;
+    my $log;
+
+    while (@file_list) {
+	my $files = "";
+	for (my $i = 0; $i < $bzcat_grouping; $i++) {
+	    my $file = shift (@file_list) || next;
+	    $files = "$files $file";
+	}
+	open ($log, "$type $files |") || die "Can't '$type $files': $!";
+	push @results, parse_log($log);
+	close $log;
+	print STDERR ".";
+    }
+
+    return @results;
+}
+
+sub parse_logs($)
+{
+    my $filelist = shift;
+    my @results;
+    my @bzipped;
+    my @gzipped;
+
+    for my $file (@{$filelist}) {
+	if ($file =~ m/\.bz2$/) {
+	    push @bzipped, $file;
+	} elsif ($file =~ m/\.gz$/) {
+	    push @gzipped, $file;
+	} else {
+	    my $log;
+	    open ($log, "$file") || die "Can't open '$file': $!";
+	    push @results, parse_log($log);
+	    close $log;
+	}
+    }
+
+    push @results, parse_type('bzcat', @bzipped);
+    push @results, parse_type('zcat',  @gzipped);
+
+    return \@results;
+}
+
+sub merge_results($)
+{
+    my $list = shift;
+
+    for my $filerec (@{$list}) {
+
+	# without helppacks and langpacks
+	next if ( $filerec->{langpack} );
+
+	# build list of files
+	my $file = $filerec->{file};
+	if (!defined $all_files_list{$file}) {
+	    $all_files_list{$file} = 0;
+	}
+	$all_files_list{$file}++;
+
+	my $date = $filerec->{pretty_date};
+	my $ver = $filerec->{version};
+	my $product = $filerec->{product};
+
+	# accumulate products
+	$products{$product} = 1;
+
+	# aggregate versions
+	$allversions{$ver} = 1;
+
+	$total_downloads++;
+
+	# aggregate by product
+	if ( !defined( $date_product_count{$date} ) ||
+	     !defined( $date_product_count{$date}{$product} ) ) {
+	    $date_product_count{$date}{$product} = 0;
+	}
+	++$date_product_count{$date}{$product};
+
+	# aggregate by version
+	if ( !defined( $date_version_count{$date} ) ||
+	     !defined( $date_version_count{$date}{$ver} ) ) {
+	    $date_version_count{$date}{$ver} = 0;
+	}
+	++$date_version_count{$date}{$ver};
+    }
+}
+
+sub spawn_parse_log_thread($)
+{
+    my ($file_list) = @_;
+    return threads->create( { 'context' => 'list' },
+			    sub { return parse_logs($file_list); } );
+}
+
+while (my $arg = shift @ARGV) {
+    if ($arg eq '-v' || $arg eq '--verbose') {
+	$verbose = 1;
+    } elsif ($arg eq '-c' || $arg eq '--cpus') {
+	$cpus_to_use = shift @ARGV;
+    } elsif ($arg eq '-u' || $arg eq '--update') {
+	$rsync_first = 1;
+    } elsif (!defined $path_to_log_tree) {
+	$path_to_log_tree = $arg;
+    } else {
+	die "Unknown parameter '$arg'";
+    }
+}
+
+if (!defined $path_to_log_tree) {
+    $path_to_log_tree = `pwd`;
+    chomp ($path_to_log_tree);
+    $path_to_log_tree = "$path_to_log_tree/downloads";
+}
+
+my @log_filenames = find_logs ($path_to_log_tree);
+if ($verbose) {
+    print STDERR "Have log paths of:\n\t" . (join("\n\t", @log_filenames)) . "\n";
+}
+
+# update first
+if ($rsync_first) {
+    system('rsync --delete -av bilbo.documentfoundation.org:/var/log/apache2/download.documentfoundation.org/ downloads/download.documentfoundation.org/ 1>&2');
+    system('rsync --delete -av bilbo.documentfoundation.org:/var/log/apache2/downloadarchive.documentfoundation.org/ downloads/downloadarchive.documentfoundation.org/ 1>&2');
+}
+
+# the slow piece - parsing the logs
+my $files_in = @log_filenames;
+my $parallel = $cpus_to_use;
+print STDERR "reading log data $files_in files:\n";
+
+if ($threaded) {
+    # divide up the work first.
+    my @thread_files;
+    for (my $i = 0; $i < $parallel; $i++) {
+	my @foo; $thread_files[$i] = \@foo;
+    }
+    while (@log_filenames) {
+	for (my $i = 0; $i < $parallel; $i++) {
+	    my $file = shift (@log_filenames) || next;
+	    push @{$thread_files[$i]}, $file;
+	}
+    }
+
+    my @threads;
+    for (my $i = 0; $i < $parallel; $i++) {
+	my $file_list = $thread_files[$i];
+	if (scalar (@{$file_list}) > 0) {
+	    push @threads, spawn_parse_log_thread ($file_list);
+	}
+    }
+
+    print STDERR "joining threads: ";
+    while (@threads) {
+	my $thread = shift @threads;
+	merge_results($thread->join());
+	print STDERR "joined";
+    }
+    print STDERR "\n";
+} else {
+    merge_results(parse_logs(\@log_filenames));
+}
+
+my $generated_stamp = "Generated on: " . qx(date --rfc-3339=seconds);
+
+# ---------------------------------------------------------------------------------
+
+# now output this as a spreadsheet ... fods ...
+
+print << "EOF"
+<?xml version="1.0" encoding="UTF-8"?>
+<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+                 xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
+                 xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+                 xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
+                 xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+                 xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
+                 xmlns:xlink="http://www.w3.org/1999/xlink"
+                 xmlns:dc="http://purl.org/dc/elements/1.1/"
+                 xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
+                 xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
+                 xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
+                 xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
+                 xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
+                 xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
+                 xmlns:math="http://www.w3.org/1998/Math/MathML"
+                 xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+                 xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
+                 xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0"
+                 xmlns:ooo="http://openoffice.org/2004/office"
+                 xmlns:ooow="http://openoffice.org/2004/writer"
+                 xmlns:oooc="http://openoffice.org/2004/calc"
+                 xmlns:dom="http://www.w3.org/2001/xml-events"
+                 xmlns:xforms="http://www.w3.org/2002/xforms"
+                 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+                 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                 xmlns:rpt="http://openoffice.org/2005/report"
+                 xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"
+                 xmlns:xhtml="http://www.w3.org/1999/xhtml"
+                 xmlns:grddl="http://www.w3.org/2003/g/data-view#"
+                 xmlns:tableooo="http://openoffice.org/2009/table"
+                 xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
+                 xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0"
+                 xmlns:css3t="http://www.w3.org/TR/css3-text/"
+                 office:version="1.2"
+                 grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl"
+                 office:mimetype="application/vnd.oasis.opendocument.spreadsheet">
+   <office:styles>
+      <number:date-style style:name="isodatenum">
+         <number:year number:style="long"/>
+         <number:text>-</number:text>
+         <number:month number:style="long"/>
+         <number:text>-</number:text>
+         <number:day number:style="long"/>
+      </number:date-style>
+      <number:percentage-style style:name="percent-number">
+        <number:number number:decimal-places="2" number:min-integer-digits="1"/>
+        <number:text>%</number:text>
+      </number:percentage-style>
+      <style:style style:name="boldheader" style:family="table-cell" style:parent-style-name="Default">
+         <style:text-properties fo:font-style="italic" fo:font-weight="bold"/>
+      </style:style>
+      <style:style style:name="isodate" style:family="table-cell" style:parent-style-name="Default"/>
+      <style:style style:name="percent" style:family="table-cell" style:parent-style-name="Default"
+       style:data-style-name="percent-number"/>
+   </office:styles>
+   <office:body>
+      <office:spreadsheet>
+         <table:table table:name="Graphs">
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>$generated_stamp</text:p>
+               </table:table-cell>
+            </table:table-row>
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Total downloads:</text:p>
+               </table:table-cell>
+            </table:table-row>
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="float"
+                office:value="$total_downloads"/>
+            </table:table-row>
+	 </table:table>
+         <table:table table:name="ProductData">
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Date</text:p>
+               </table:table-cell>
+EOF
+;
+
+# ---------------------------------------------------------------------------------
+
+# By Product sheet
+
+my @prods = sort keys %products;
+for my $product (@prods) {
+print << "EOF"
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>$product</text:p>
+               </table:table-cell>
+EOF
+	    ;
+}
+print << "EOF"
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Total</text:p>
+               </table:table-cell>
+            </table:table-row>
+EOF
+;
+
+my $row = 1;
+
+my $colcount = @prods;
+my $colname = coltoref ($colcount);
+# print STDERR "cols: $colcount - colname $colname @prods\n";
+
+for my $date (sort keys %date_product_count) {
+print << "EOF"
+            <table:table-row>
+EOF
+;
+    print_date_cell("isodate", $date);
+    for my $product (@prods) {
+	my $count = $date_product_count{$date}->{$product};
+	$count = 0 if (!defined $count);
+print << "EOF"
+               <table:table-cell office:value-type="float" office:value="$count"/>
+EOF
+;
+    }
+    $row++;
+print << "EOF"
+               <table:table-cell table:formula="of:=SUM([.B$row:.$colname$row])" office:value-type="float"/>
+            </table:table-row>
+EOF
+;
+}
+
+# Summary / formulae
+{
+    print << "EOF"
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Total</text:p>
+               </table:table-cell>
+EOF
+    ;
+    my $col;
+    for ($col = 1; $col <= $colcount + 1; $col++) {
+	my $ref = coltoref ($col);
+	print ("               <table:table-cell table:formula=\"of:=SUM([.$ref"."2:.$ref$row])\" office:value-type=\"float\"/>\n");
+    }
+
+print << "EOF"
+            </table:table-row>
+EOF
+    ;
+}
+
+# Summary as %ages ...
+
+{
+    print << "EOF"
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Percent</text:p>
+               </table:table-cell>
+EOF
+    ;
+    my $col;
+    $row++;
+    my $totalref = coltoref($colcount + 1) . "$row";
+    for ($col = 1; $col <= $colcount + 1; $col++) {
+	my $ref = coltoref ($col);
+	print ("               <table:table-cell table:style-name=\"percent\" table:formula=\"of:=[.$ref$row]/[.$totalref]\" office:value-type=\"percentage\"/>\n");
+    }
+
+print << "EOF"
+            </table:table-row>
+         </table:table>
+EOF
+    ;
+}
+
+# ---------------------------------------------------------------------------------
+
+# By version sheet
+
+# First collapse trivial / invalid versions - under 0.2%
+my @todelete = ();
+my $threshold = (2 * $total_downloads) / 1000;
+for my $version (keys %allversions) {
+    my $total = 0;
+    for my $date (keys %date_version_count) {
+	my $count = $date_version_count{$date}->{$version};
+	$count = 0 if(!defined $count);
+	$total = $total + $count;
+    }
+    if ($total < $threshold) {
+#	print STDERR "collapsing trivial version '$version' count $total into 'invalid'\n";
+	push @todelete, $version;
+	for my $date (keys %date_version_count) {
+	    my $count = $date_version_count{$date}->{$version};
+	    if (defined $count) {
+		if (!defined $date_version_count{$date}->{'invalid'}) {
+		    $date_version_count{$date}->{'invalid'} = $count;
+		} else {
+		    $date_version_count{$date}->{'invalid'} += $count;
+		}
+	    }
+	}
+    }
+}
+if (@todelete) {
+    for my $version (@todelete) {
+	delete $allversions{$version};
+    }
+    $allversions{'invalid'} = 1; # so we get the result
+}
+
+print << "EOF"
+         <table:table table:name="Versions">
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Date</text:p>
+               </table:table-cell>
+EOF
+;
+for my $version (sort keys %allversions) {
+print << "EOF"
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>$version</text:p>
+               </table:table-cell>
+EOF
+	    ;
+}
+print << "EOF"
+            </table:table-row>
+EOF
+    ;
+    for my $date (sort keys %date_version_count) {
+print << "EOF"
+            <table:table-row>
+EOF
+;
+        print_date_cell("isodate", $date);
+        for my $ver (sort keys %allversions) {
+	    my $count = $date_version_count{$date}->{$ver};
+	    $count = 0 if(!defined $count);
+print << "EOF"
+               <table:table-cell office:value-type="float" office:value="$count"/>
+EOF
+;
+	}
+print << "EOF"
+            </table:table-row>
+EOF
+;
+    }
+
+print << "EOF"
+         </table:table>
+EOF
+    ;
+
+# ---------------------------------------------------------------------------------
+
+#   misc. debugging / information
+
+print << "EOF"
+         <table:table table:name="Files">
+            <table:table-row>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>Name</text:p>
+               </table:table-cell>
+               <table:table-cell table:style-name="boldheader" office:value-type="string">
+                  <text:p>count</text:p>
+               </table:table-cell>
+            </table:table-row>
+EOF
+    ;
+
+    for my $file (sort { $all_files_list{$b} <=> $all_files_list{$a} } keys %all_files_list) {
+	my $count = $all_files_list{$file};
+print << "EOF"
+            <table:table-row>
+               <table:table-cell office:value-type="string">
+                  <text:p>$file</text:p>
+               </table:table-cell>
+               <table:table-cell office:value-type="float" office:value="$count"/>
+            </table:table-row>
+EOF
+	    ;
+    }
+
+print << "EOF"
+	 </table:table>
+EOF
+    ;
+
+# ---------------------------------------------------------------------------------
+
+# end of spreadsheet ...
+
+print << "EOF"
+      </office:spreadsheet>
+   </office:body>
+</office:document>
+EOF
+;
commit 81bff03148f8273376da7ac3c6ded1e62a1f1696
Author: Michael Meeks <michael.meeks at suse.com>
Date:   Wed Apr 3 16:42:29 2013 +0100

    remove old download-stats generation.

diff --git a/scripts/download-stats.pl b/scripts/download-stats.pl
deleted file mode 100755
index 13e906c..0000000
--- a/scripts/download-stats.pl
+++ /dev/null
@@ -1,704 +0,0 @@
-#!/usr/bin/perl -w
-#
-# This script parses, and interprets the output from:
-#   pg_dump -a downloadstats -f stats_counter -F plain -t stats_counter
-# on a mirrorbrain server, thus:
-#   cat stats_counter | ./dlstats.pl --libo --weekify > /tmp/output.fods
-#
-# It also parses the mirrorbrain data from an Apache server ...
-# wget http://download.services.openoffice.org/stats/csv/201201.csv # etc ...
-# cat 201201.csv 201202.csv 201203.csv | dlstats.pl --csv > /tmp/output.fods
-#
-# It also parses raw sql reload dumps
-#
-use strict;
-use Date::Parse;
-use POSIX qw(strftime);
-
-# First a massive database of population statistics
-# source: the CIA:
-# https://www.cia.gov/library/publications/the-world-factbook/rankorder/2119rank.html
-my %locale_to_population = (
-#	Location => Citizens # Comment
-	'cn' => 1343239923,
-	'in' => 1205073612,
-	'us' => 313847465,
-	'id' => 248645008,
-	'br' => 199321413,
-	'pk' => 190291129, # Pakistan
-	'ng' => 170123740, # Nigeria
-	'bd' => 161083804, # Bangladesh
-	'ru' => 142517670,
-	'jp' => 127368088,
-	'mx' => 114975406,
-	'ph' => 103775002,
-	'vn' => 91519289, # Vietnam
-	'eg' => 83688164, # Egypt
-	'de' => 81305856,
-	'tr' => 79749461,
-	'ir' => 78868711, # Iran
-	'th' => 67091089,
-	'fr' => 65630692,
-	'gb' => 63047162,
-	'it' => 61261254,
-	'lt' => 61261254,
-	'kr' => 48860500,
-	'za' => 48810427,
-	'es' => 47042984,
-	'co' => 45239079,
-	'ua' => 44854065,
-	'ke' => 43013341, # Kenya
-	'ar' => 42192494,
-	'pl' => 38415284,
-	'dz' => 37367226, # Algeria
-	'ca' => 34300083,
-	'ma' => 32309239, # Morocco
-	'pe' => 29549517, # Peru
-	'my' => 29179952, # Malaysia
-	've' => 28047938,
-	'sa' => 26534504, # Saudi Arabia
-	'tw' => 23234936,
-	'au' => 22015576,
-	'ro' => 21848504,
-	'lk' => 21481334, # Sri Lanka
-	'kz' => 17522010, # Kazakhstan
-	'cl' => 17067369,
-	'nl' => 16730632,
-	'ec' => 15223680, # Ecuador
-	'gt' => 14099032, # Guatemala
-	'cu' => 11075244, # Vcuba
-	'pt' => 10781459,
-	'gr' => 10767827,
-	'tn' => 10732900, # Tunisia
-	'be' => 10438353,
-	'bo' => 10290003, # Bolivia
-	'cz' => 10177300,
-	'do' => 10088598, # Dominican Republic
-	'hu' => 9958453,
-	'by' => 9643566, # Belarus
-	'se' => 9103788,
-	'hn' => 8296693, # Honduras
-	'at' => 8219743,
-	'ch' => 7925517,
-	'il' => 7590758, # Israel
-	'rs' => 7276604,
-	'hk' => 7153519,
-	'bg' => 7037935, # Bulgaria
-	'py' => 6541591, # Paraguay
-	'sv' => 6090646, # El Salvador
-	'ni' => 5727707, # Nicaragua
-	'dk' => 5543453,
-	'sk' => 5483088,
-	'sg' => 5353494, # Singapore
-	'ae' => 5314317, # United Arab Emirates
-	'fi' => 5262930,
-	'ie' => 4722028, # Republic of Ireland
-	'no' => 4707270,
-	'cr' => 4636348, # Costa Rica
-	'hr' => 4480043, # Croatia
-	'nz' => 4327944,
-	'ba' => 3879296, # Bosnia and Herzegovina
-	'pr' => 3690923, # Puerto Rico
-	'md' => 3656843, # Moldova
-	'pa' => 3510045, # Panama
-	'uy' => 3316328, # Uruguay
-	'jm' => 2889187, # Jamaica
-	'kw' => 2646314, # Kuwait
-	'lv' => 2191580, # Latvia
-	'mk' => 2082370, # Republic of Macedonia
-	'si' => 1996617, # Slovenia
-	'ee' => 1274709, # Estonia
-	'tt' => 1226383, # Republic of Trinidad and Tobago
-	'cy' => 1138071, # Cyprus
-	'lu' => 509074,  # Luxembourg
-	'mt' => 409836,  # Malta
-	'mq' => 403795,  # Martinique
-	'is' => 313183,  # Iceland
-	'nc' => 260166,  # New Caledonia
-    );
-
-# segment by Date, then by Product, then count
-my %data;
-my %products;
-my %prod_names;
-my %byregion;
-my %countries;
-my %byversion;
-my %allversions;
-my $total_downloads = 0;
-my $crunch_langs;
-my $top_n_countries = 10;
-my %by_date_country;
-my $weekify = 0;
-my %weekified_dates;
-
-# FIXME: ODF is -incredibly- lame in this regard ... we badly want R1C1 style referencing here [!]
-sub coltoref($)
-{
-    my $col = shift;
-    die ("odff needs R1C1") if ($col > 25);
-    return chr (ord('A') + $col);
-}
-
-sub print_date_cell($$)
-{
-    my ($style,$date) = @_;
-    if (!$weekify) {
-print << "EOF"
-            <table:table-cell table:style-name="$style" office:value-type="date" office:date-value="$date"/>
-EOF
-;
-    } else {
-print << "EOF"
-            <table:table-cell table:style-name="$style" office:value-type="string">
-                  <text:p>$date</text:p>
-	    </table:table-cell>
-EOF
-;
-    }
-}
-
-my $log_format;
-my $country_match;
-for my $arg (@ARGV) {
-    die "pass --csv --libo or --sql and/or --weekify and/or --country=NN" if ($arg eq '--help' || $arg eq '-h');
-    if ($arg eq '--csv' || $arg eq '-c') {
-	$log_format = 'c';
-    } elsif ($arg eq '--libo' || $arg eq -'l') {
-	$log_format = 'l';
-    } elsif ($arg eq '--sql' || $arg eq -'s') {
-	$log_format = 's';
-    } elsif ($arg eq '--weekify') {
-	$weekify = 1;
-    } elsif ($arg =~ m/--country=(\S+)$/) {
-	$country_match = $1;
-    } else {
-	die "Unknown argument '$arg'";
-    }
-}
-defined $log_format || die "you must pass a format type";
-# select the format you want
-
-print STDERR "Log format: $log_format\n";
-
-# Analysing stats:
-#
-# grep for 'multi' - yields the Windows installer ... (also grep for 'all_lang') - all of them [!]
-# grep for 'Linux' and 'en-US' yields total Linux main binary downloads ...
-# grep for 'Mac' and 'en-US' yields total Mac main binary numbers ...
-
-while (<STDIN>) {
-    chomp();
-    my $line = $_;
-    my ($id, $date, $product, $osname, $version, $lang, $country, $count);
-
-    $line =~ s/[\s\r\n]*$//;
-#    print STDERR "line '$line'\n";
-
-    my $type;
-    my $clean_product;
-
-    if ($log_format eq 'l' && # a database dump from mirrorbrain:
-#    17424    2011-01-25      LibO	Win-x86	3.3.0	        all_lang	qa	1
-	$line =~ m/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*$/) {
-	($id, $date, $product, $osname, $version, $lang, $country, $count) = ($1, $2, $3, $4, $5, $6, $7, $8);
-
-#	print "$count downloads on date $date, os $osname $lang\n";
-
-	if ($date lt '2011-01-25') {
-#	    print STDERR "ignoring $date\n";
-	    next
-	}
-
-	$prod_names{$product} = 1;
-	# Ignore Mac / Linux help packs etc.
-	if (($osname =~ /Linux/ || $osname =~ /MacOS/) && $lang eq 'en-US') {
-	    $clean_product = $osname;
-	}
-	# Detect Windows distinctions
- 	if (($product eq 'LibO' || $product eq 'LO' || $product eq 'BrOffice') &&
-	    $osname =~ /Win/ && ($lang =~ /multi/ || $lang =~ /all_lang/)) {
-	    $clean_product = "$osname-$lang";
-	}
-
-	# Detect PortableOffice distinctions
-	if ($product eq 'LibreOfficePortable') {
-	    $clean_product = "Portable";
-	}
-
-	# Detect DVD image
-	if ($product eq 'LibO-DVD') {
-	    $clean_product = "DVD";
- 	}
-
-	# Count product downloads by region
-	if (defined $clean_product) {
-	    $type = 'product';
-	} else {
-#	    print STDERR "lang pack line '$line' => '$lang' '$country' '$count'\n";
-	    $type = 'lang pack';
-        }
-
-    } elsif ($log_format eq 'c' &&
-#    2012-01-03,OOo,3.3.0,Linux_x86-64_langpack-deb,as,se,1
-	     $line =~ m/^\s*(\S+),(\S+),(\S+),(\S+),(\S+),(\S+),(\d+)\s*$/) {
-	my $project;
-	($date, $project, $version, $product, $lang, $country, $count) = ($1, $2, $3, $4, $5, $6, $7);
-#	ERROR - convert the product ! ... to
-#	    $clean_product and $type ...
-	if ($project =~ m/SDK/i) {
-	    $product = 'sdk';
-	}
-	if ($product =~ m/langpack/i) {
-	    $type = 'lang pack';
-	} else {
-	    $type = 'product'
-	}
-	$prod_names{$product} = 1;
-	$clean_product = $product;
-	$clean_product =~ s/_langpack//;
-	$clean_product =~ s/_install//;
-	$clean_product =~ s/-wJRE//;
-	$clean_product =~ s/-deb//;
-	$clean_product =~ s/-rpm//;
-
-#	print STDERR "$count downloads of $clean_product on date $date, of $lang from $country\n";
-    } elsif ($log_format eq 's') {
-#       INSERT INTO `clean_downloads` (`date`, `product`, `os`, `language`, `version`, `downloads`) VALUES
-#       ('2008-12-21', 'OpenOffice.org', 'winwjre', 'es', '3.0.0Ã¯Â¿Â½', 1),
-	if ( $line =~ m/^\('(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'([^']+)',\s*(\d+)\s*\)[,;]\s*$/) {
-	    my $project;
-	    ($date, $project, $product, $country, $version, $count) = ($1, $2, $3, $4, $5, $6);
-
-	    if ($product =~ m/langpack/) {
-		$type = 'lang pack';
-	    } else {
-		$type = 'product';
-	    }
-
-	    if ($product =~ m/linux/i) {
-		$clean_product = "linux";
-	    } elsif ($product =~ m/win/i) {
-		$clean_product = "win";
-	    } elsif ($product =~ m/mac/i) {
-		$clean_product = "mac";
-	    } elsif ($product =~ m/solar/i) {
-		$clean_product = "solaris";
-	    } else {
-		$clean_product = "other";
-	    }
-	    if ($product =~ m/64/) {
-		$clean_product = $clean_product . "64";
-	    }
-
-	    if ($version =~ m/^(\d+\.\d+\.\d+)/) {
-		$version = $1;
-	    } elsif ($version =~ m/^(\d+\.\d+)/) {
-		$version = $1 . ".0";
-	    } elsif ($version =~ m/^(\d+)/) {
-		$version = $1 . ".0.0";
-	    } elsif ($version =~ /^(...\d\d\d_m\d+)/) {
-		$version = $1;
-	    } else {
-#		print STDERR "invalid version: '$version'\n";
-		$version = 'invalid';
-	    }
-
-#	    print STDERR "$count downloads of $product on date $date, from $country\n";
-# INSERT INTO `clean_downloads_summary` (`product`, `os`, `language`, `date`, `downloads`, `smooth_downloads`) VALUES'
-#       ('BrOffice.org', 'linuxintel', 'pt-BR', '2008-10-13', 155, 155),
-	} elsif ( $line =~ m/^\('(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'([^']+)',\s*(\d+)\s*\)[,;]\s*$/) { # need new regex
-	    # odd - duplicate data ? ignore it ...
-	} else {
-#	    print STDERR "malformed sql line '$line'\n";
-	    next;
-	}
-    } else {
-	print STDERR "malformed line '$line'\n";
-	next;
-    }
-
-    if (defined $country_match) {
-	$country =~ m/$country_match/ || next;
-    }
-
-    if ($weekify) {
-	if (!defined $weekified_dates{$date}) {
-	    my @time = gmtime (str2time($date. "T01:01:01.000001"));
-	    $weekified_dates{$date} = (1900 + $time[5]) . "-". POSIX::strftime("%V", @time); # 2012-1 (week number)
-	}
-#	print STDERR "weekify date '$date' to '".$weekified_dates{$date}."'\n";
-	$date = $weekified_dates{$date};
-    }
-
-    # Accumulate versions by date for products
-    if ($type eq 'product') {
-	my $byver;
-	if (!defined $byversion{$date}) {
-	    my %byver = ();
-	    $byversion{$date} = \%byver;
-	}
-	my $norc_ver = $version; # remove 3.4.2-1 (rc1) type versions
-	$norc_ver =~ s/\-\d$//;
-	$allversions{$norc_ver} = 1;
-	if (!defined $byversion{$date}->{$norc_ver}) {
-	    $byversion{$date}->{$norc_ver} = 0;
-	}
-	$byversion{$date}->{$norc_ver} += $count;
-
-	$by_date_country{$date}->{$country} = 0 if (!defined $by_date_country{$date}->{$country});
-	$by_date_country{$date}->{$country} += $count;
-    }
-
-    my %hash;
-    $byregion{$type} = \%hash if (!defined $byregion{$type});
-    $byregion{$type}->{$country} = 0 if (!defined $byregion{$type}->{$country});
-    $byregion{$type}->{$country} += $count;
-    $countries{$country} = 1;
-
-    if (!defined $clean_product) {
-#	    print "uninteresting line '$line'\n";
-	next;
-    }
-
-    $total_downloads += $count;
-
-    $products{$clean_product} = 1;
-    if (!defined $data{$date}) {
-	my %byproduct;
-	$data{$date} = \%byproduct;
-    }
-    if (!defined ($data{$date}->{$clean_product})) {
-	$data{$date}->{$clean_product} = 0;
-    }
-    $data{$date}->{$clean_product} += $count;
-# 	print "count for '$date' and '$clean_product' == $data{$date}->{$clean_product} [ added $count ]\n";
-}
-
-print STDERR "Dirty product names:\n";
-for my $prod (sort keys %prod_names) {
-    print STDERR "\t$prod\n";
-}
-
-my @countries_by_product;
- at countries_by_product = sort { $byregion{'product'}->{$b} <=> $byregion{'product'}->{$a} } keys %countries;
-my @top_countries = @countries_by_product;
- at top_countries = splice(@top_countries, 0, $top_n_countries);
-
-# now output this as a spreadsheet ... fods ...
-print << 'EOF'
-<?xml version="1.0" encoding="UTF-8"?>
-<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
-                 xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
-                 xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
-                 xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
-                 xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
-                 xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
-                 xmlns:xlink="http://www.w3.org/1999/xlink"
-                 xmlns:dc="http://purl.org/dc/elements/1.1/"
-                 xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
-                 xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
-                 xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
-                 xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
-                 xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
-                 xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
-                 xmlns:math="http://www.w3.org/1998/Math/MathML"
-                 xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
-                 xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
-                 xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0"
-                 xmlns:ooo="http://openoffice.org/2004/office"
-                 xmlns:ooow="http://openoffice.org/2004/writer"
-                 xmlns:oooc="http://openoffice.org/2004/calc"
-                 xmlns:dom="http://www.w3.org/2001/xml-events"
-                 xmlns:xforms="http://www.w3.org/2002/xforms"
-                 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
-                 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-                 xmlns:rpt="http://openoffice.org/2005/report"
-                 xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"
-                 xmlns:xhtml="http://www.w3.org/1999/xhtml"
-                 xmlns:grddl="http://www.w3.org/2003/g/data-view#"
-                 xmlns:tableooo="http://openoffice.org/2009/table"
-                 xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
-                 xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0"
-                 xmlns:css3t="http://www.w3.org/TR/css3-text/"
-                 office:version="1.2"
-                 grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl"
-                 office:mimetype="application/vnd.oasis.opendocument.spreadsheet">
-   <office:styles>
-      <number:date-style style:name="isodatenum">
-         <number:year number:style="long"/>
-         <number:text>-</number:text>
-         <number:month number:style="long"/>
-         <number:text>-</number:text>
-         <number:day number:style="long"/>
-      </number:date-style>
-      <style:style style:name="boldheader" style:family="table-cell" style:parent-style-name="Default">
-         <style:text-properties fo:font-style="italic" fo:font-weight="bold"/>
-      </style:style>
-      <style:style style:name="isodate" style:family="table-cell" style:parent-style-name="Default"/> <!-- style:data-style-name="isodatenum" -->
-   </office:styles>
-   <office:body>
-      <office:spreadsheet>
-         <table:table table:name="Graphs">
-	 </table:table>
-         <table:table table:name="ProductData">
-            <table:table-row>
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>Date</text:p>
-               </table:table-cell>
-EOF
-;
-my @prods = sort keys %products;
-for my $product (@prods) {
-print << "EOF"
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>$product</text:p>
-               </table:table-cell>
-EOF
-	    ;
-}
-print << "EOF"
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>Total</text:p>
-               </table:table-cell>
-            </table:table-row>
-EOF
-;
-
-my $row = 1;
-
-my $colcount = @prods;
-my $colname = coltoref ($colcount);
-print STDERR "cols: $colcount - colname $colname @prods\n";
-
-for my $date (sort keys %data) {
-print << "EOF"
-            <table:table-row>
-EOF
-;
-    print_date_cell("isodate", $date);
-    for my $product (@prods) {
-	my $count = $data{$date}->{$product};
-	$count = 0 if (!defined $count);
-print << "EOF"
-               <table:table-cell office:value-type="float" office:value="$count"/>
-EOF
-;
-    }
-    $row++;
-print << "EOF"
-               <table:table-cell table:formula="of:=SUM([.B$row:.$colname$row])" office:value-type="float"/>
-            </table:table-row>
-EOF
-;
-}
-
-# Summary / formulae
-{
-    print << "EOF"
-            <table:table-row>
-               <table:table-cell/>
-EOF
-    ;
-    my $col;
-    for ($col = 1; $col <= $colcount + 1; $col++) {
-	my $ref = coltoref ($col);
-	print ("               <table:table-cell table:formula=\"of:=SUM([.$ref"."2:.$ref$row])\" office:value-type=\"float\"/>\n");
-    }
-
-print << "EOF"
-            </table:table-row>
-EOF
-    ;
-}
-
-# Summary as %ages ...
-
-{
-    print << "EOF"
-            <table:table-row>
-               <table:table-cell/>
-EOF
-    ;
-    my $col;
-    $row++;
-    my $totalref = coltoref($colcount + 1) . "$row";
-    for ($col = 1; $col <= $colcount + 1; $col++) {
-	my $ref = coltoref ($col);
-	print ("               <table:table-cell table:formula=\"of:=[.$ref$row]/[.$totalref]\" office:value-type=\"float\"/>\n");
-    }
-
-print << "EOF"
-            </table:table-row>
-         </table:table>
-EOF
-    ;
-}
-
-# LangData sheet
-
-print << "EOF"
-         <table:table table:name="LangData">
-            <table:table-row>
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>Location</text:p>
-               </table:table-cell>
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>product</text:p>
-               </table:table-cell>
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>lang-pack</text:p>
-               </table:table-cell>
-            </table:table-row>
-EOF
-    ;
-    for my $country (@countries_by_product) {
-	my $product = 0; my $lang_pack = 0;
-	$product += $byregion{'product'}->{$country} if (defined $byregion{'product'}->{$country});
-	$lang_pack += $byregion{'lang pack'}->{$country} if (defined $byregion{'lang pack'}->{$country});
-print << "EOF"
-            <table:table-row>
-               <table:table-cell office:value-type="string"><text:p>$country</text:p></table:table-cell>
-               <table:table-cell office:value-type="float" office:value="$product"/>
-               <table:table-cell office:value-type="float" office:value="$lang_pack"/>
-            </table:table-row>
-EOF
-;
-    }
-
-print << "EOF"
-         </table:table>
-EOF
-;
-
-# By version sheet
-
-# First collapse trivial / invalid versions - under 0.1% - particularly for the sql dbase
-my @todelete = ();
-my $threshold = $total_downloads / 1000;
-for my $version (keys %allversions) {
-    my $total = 0;
-    for my $date (keys %byversion) {
-	my $count = $byversion{$date}->{$version};
-	$count = 0 if(!defined $count);
-	$total = $total + $count;
-    }
-    if ($total < $threshold) {
-	print STDERR "collapsing trivial version '$version' count $total into 'invalid'\n";
-	push @todelete, $version;
-	for my $date (keys %byversion) {
-	    my $count = $byversion{$date}->{$version};
-	    if (defined $count) {
-		if (!defined $byversion{$date}->{'invalid'}) {
-		    $byversion{$date}->{'invalid'} = $count;
-		} else {
-		    $byversion{$date}->{'invalid'} += $count;
-		}
-	    }
-	}
-    }
-}
-for my $version (@todelete) {
-    delete $allversions{$version};
-}
-
-print << "EOF"
-         <table:table table:name="Versions">
-            <table:table-row>
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>Date</text:p>
-               </table:table-cell>
-EOF
-;
-for my $version (sort keys %allversions) {
-print << "EOF"
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>$version</text:p>
-               </table:table-cell>
-EOF
-	    ;
-}
-print << "EOF"
-            </table:table-row>
-EOF
-    ;
-    for my $date (sort keys %byversion) {
-print << "EOF"
-            <table:table-row>
-EOF
-;
-        print_date_cell("isodate", $date);
-        for my $ver (sort keys %allversions) {
-	    my $count = $byversion{$date}->{$ver};
-	    $count = 0 if(!defined $count);
-print << "EOF"
-               <table:table-cell office:value-type="float" office:value="$count"/>
-EOF
-;
-	}
-print << "EOF"
-            </table:table-row>
-EOF
-;
-    }
-
-print << "EOF"
-         </table:table>
-EOF
-    ;
-
-# Language product download / comparison sheet ...
-print << "EOF"
-         <table:table table:name="TopDownloadLocations">
-            <table:table-row>
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>Date</text:p>
-               </table:table-cell>
-EOF
-;
-
-for my $lang (@top_countries) {
-print << "EOF"
-               <table:table-cell table:style-name="boldheader" office:value-type="string">
-                  <text:p>$lang</text:p>
-               </table:table-cell>
-EOF
-	    ;
-}
-print << "EOF"
-            </table:table-row>
-EOF
-;
-
-for my $date (sort keys %data) {
-print << "EOF"
-            <table:table-row>
-EOF
-;
-    print_date_cell("isodate", $date);
-    for my $lang (@top_countries) {
-	my $count = $by_date_country{$date}->{$lang};
-	$count = 0 if (!defined $count);
-print << "EOF"
-               <table:table-cell office:value-type="float" office:value="$count"/>
-EOF
-;
-    }
-print << "EOF"
-            </table:table-row>
-EOF
-;
-}
-print << "EOF"
-         </table:table>
-EOF
-    ;
-
-# end of spreadsheet ...
-
-print << "EOF"
-      </office:spreadsheet>
-   </office:body>
-</office:document>
-EOF
-;