[Libreoffice-commits] dev-tools.git: 2 commits - scripts/download-stats.pl
Michael Meeks
michael.meeks at suse.com
Wed Apr 3 08:43:57 PDT 2013
scripts/download-stats.pl | 905 +++++++++++++++++++++++++---------------------
1 file changed, 501 insertions(+), 404 deletions(-)
New commits:
commit c78dcc2843b733c7ff237dea178d7599f9d1f730
Author: Michael Meeks <michael.meeks at suse.com>
Date: Wed Apr 3 16:42:47 2013 +0100
add apache log analysis tool to generate download stats.
Kendy wrote at least the hardest half.
diff --git a/scripts/download-stats.pl b/scripts/download-stats.pl
new file mode 100755
index 0000000..3e128e0
--- /dev/null
+++ b/scripts/download-stats.pl
@@ -0,0 +1,801 @@
+#!/usr/bin/perl -w
+
+use strict;
+use threads ('yield',
+ 'stack_size' => 64*4096,
+ 'exit' => 'threads_only',
+ 'stringify');
+use POSIX qw(strftime);
+
+my $verbose = 0;
+my $rsync_first = 0;
+my $cpus_to_use = 32; # level of parallelism
+my $bzcat_grouping = 10; # files to pass to bzcat at once
+my $path_to_log_tree;
+my $threaded = 1;
+
+my %month_to_num = (
+ 'Jan' => '01',
+ 'Feb' => '02',
+ 'Mar' => '03',
+ 'Apr' => '04',
+ 'May' => '05',
+ 'Jun' => '06',
+ 'Jul' => '07',
+ 'Aug' => '08',
+ 'Sep' => '09',
+ 'Oct' => '10',
+ 'Nov' => '11',
+ 'Dec' => '12',
+);
+
+my %products;
+my %allversions;
+my %all_files_list;
+my %date_product_count;
+my %date_version_count;
+my $total_downloads;
+
+# FIXME: ODF is -incredibly- lame in this regard ... we badly want R1C1 style referencing here [!]
+sub coltoref($)
+{
+ my $col = shift;
+ die ("odff needs R1C1") if ($col > 25);
+ return chr (ord('A') + $col);
+}
+
+sub print_date_cell($$)
+{
+ my ($style,$date) = @_;
+ # sadly not truly a date but a year + ISO week number.
+print << "EOF"
+ <table:table-cell table:style-name="$style" office:value-type="string">
+ <text:p>$date</text:p>
+ </table:table-cell>
+EOF
+;
+}
+
+sub find_logs($);
+sub find_logs($)
+{
+ my $path = shift;
+ my $dirh;
+ my @logfiles;
+
+ if (-f $path ) {
+ if ($path =~ m/documentfoundation\.org.*access_log/) {
+ if ($verbose) {
+ print STDERR "hit: $path\n";
+ }
+ return $path;
+ } else {
+ return;
+ }
+ }
+
+ opendir ($dirh, $path) || die "can't open '$path': $!";
+ while (my $name = readdir ($dirh)) {
+ next if ($name =~ m/^\./);
+ push @logfiles, find_logs("$path/$name");
+ }
+ close ($dirh);
+
+ return @logfiles;
+}
+
+sub is_uninteresting_file($)
+{
+ my $file = shift;
+
+ return 1 if ( $file =~ /^$/ );
+ return 1 if ( $file =~ /^{/ );
+ return 1 if ( $file =~ /^%/ );
+ return 1 if ( $file =~ /^debian-repo\/testing\// );
+ return 1 if ( $file =~ /^\/libreoffice\/old\// );
+ return 1 if ( $file =~ /^\/libreoffice\/src\// );
+ return 1 if ( $file =~ /^\/robots\.txt$/ );
+ return 1 if ( $file =~ /\/index\.php$/ );
+ return 1 if ( $file =~ /\/a\.sh$/ );
+ return 1 if ( $file =~ /^\/TIMESTAMP/ );
+
+ # ignore source
+ return 1 if ( $file =~ m|/src/| );
+
+ # ignore android remote
+ return 1 if ( $file =~ m/ImpressRemote.apk$/ );
+
+ # anywhere
+ return 1 if ( $file =~ /\/customer_testimonials.php/ );
+
+ # anything that is missing an extension (directory names, metafiles) and slash
+ return 1 if ( $file =~ /\/[^.\/]+$/ );
+ return 1 if ( $file =~ /^[^\/]+$/ );
+
+ # not interesting extensions
+ return 1 if ( $file =~ /\/$/ );
+ return 1 if ( $file =~ /\?C=[MNS];O=[AD]$/ );
+ return 1 if ( $file =~ /\.asc$/ );
+ return 1 if ( $file =~ /\.btih$/ );
+ return 1 if ( $file =~ /\.css$/ );
+ return 1 if ( $file =~ /\/favicon\.ico$/ );
+ return 1 if ( $file =~ /\.gif$/ );
+ return 1 if ( $file =~ /\.gpg$/ );
+ return 1 if ( $file =~ /\.html$/ );
+ return 1 if ( $file =~ /\.info\.php$/ );
+ return 1 if ( $file =~ /\.log$/ );
+ return 1 if ( $file =~ /\.magnet$/ );
+ return 1 if ( $file =~ /\.md5$/ );
+ return 1 if ( $file =~ /\.meta4$/ );
+ return 1 if ( $file =~ /\.metalink$/ );
+ return 1 if ( $file =~ /\.mirrorlist$/ );
+ return 1 if ( $file =~ /\/Packages$/ );
+ return 1 if ( $file =~ /\/Packages\.bz2$/ );
+ return 1 if ( $file =~ /\/Packages\.gz$/ );
+ return 1 if ( $file =~ /\/Packages\.lzma$/ );
+ return 1 if ( $file =~ /\/Packages\.xz$/ );
+ return 1 if ( $file =~ /\.png$/ );
+ return 1 if ( $file =~ /\/Release$/ );
+ return 1 if ( $file =~ /\.sha1$/ );
+ return 1 if ( $file =~ /\.sha256$/ );
+ return 1 if ( $file =~ /\.torrent$/ );
+ return 1 if ( $file =~ /\.zsync$/ );
+
+ # noise
+ return 1 if ( $file =~ /%/ );
+ return 1 if ( $file =~ /&/ );
+
+ # is interesting ...
+ return 0;
+}
+
+sub characterise($$)
+{
+ my ($filerec, $file) = @_;
+
+ # currently based entirely on the filename
+ $file =~ m|/([^/]+)$| || die "not a filename: '$file'";
+ my $name = $1;
+
+ $name =~ s/BrOffice/LibO/; # BrOffice is obsolete
+ $name =~ s/-/_/g; # use underscores everywhere
+
+ my @elements = split(/_/, $name);
+
+ if (@elements < 2) {
+ print STDERR "Unknown filename '$name'\n";
+ return 0;
+ }
+ my $prod = $elements[0];
+
+ if ($prod eq 'LibO' &&
+ ( $elements[1] eq 'SDK' || $elements[1] eq 'Dev' ) ) { # ignore sdk + dev-builds
+ return 0;
+
+ } elsif ( $prod eq 'LibreOfficePortableTest') { # ignore test builds
+ return 0;
+
+ # Odd - legacy stuff
+ } elsif ($prod eq 'libreoffice' && (
+ $name =~ m/\.tar\.gz$/ ||
+ $name =~ m/\.tar.bz2$/ ||
+ $name =~ m/\.tar\.xz$/)) { # source
+ return 0;
+
+ # obsolete snafu
+ } elsif ($prod eq 'libo3.4.4' && $name =~ /\.iso$/) {
+ $filerec->{version} = '3.4.4';
+ $filerec->{product} = 'Win-dvd';
+
+ # LibreOffice portable
+ } elsif ($prod eq 'LibreOfficePortable') {
+ if ($name =~ m/(\d\.\d\.\d).*\.exe$/) {
+ $filerec->{version} = $1;
+ $filerec->{product} = 'Win-portable';
+ } else {
+ print STDERR "Unknown portable version in '$name'\n";
+ return 0;
+ }
+
+ # Bread and butter:
+ } elsif ($prod eq 'LibO' || $prod eq 'LibreOffice' ||
+ $prod eq 'LO' || $prod eq 'LibOx') {
+ $filerec->{version} = $elements[1];
+
+ my $product;
+ if ($name =~ m/\.iso$/) {
+ if ($name =~ m/allproducts/) {
+ $product = "All-dvd";
+ } else {
+ $product = "Win-dvd";
+ }
+ } elsif ($name =~ m/Win_x86/) {
+ $product = "Win-x86";
+ } elsif ($name =~ m/Linux_x86-64/) {
+ $product = "Linux-x86-64";
+ } elsif ($name =~ m/Linux_x86/) {
+ $product = "Linux-x86";
+ } elsif ($name =~ m/MacOS_x86/) {
+ $product = "Mac-x86";
+ } elsif ($name =~ m/MacOS_PPC/) {
+ $product = "Mac-PPC";
+ } else {
+ print STDERR "Unknown product for '$name'\n";
+ }
+ $filerec->{product} = $product;
+
+ } else {
+ print STDERR "Unknown initial element '$prod' of '$name'\n";
+ return 0;
+ }
+
+ # characterise helppacks and langpacks
+
+ $name =~ s/helppack/langpack/g; # destructive !
+ $filerec->{langpack} = 0;
+ if ($name =~ /_langpack_/ ) {
+ $filerec->{langpack} = 1;
+ }
+# print STDERR "'$name' is a lang-pack: " . $filerec->{langpack} . "\n";
+
+ return 1;
+}
+
+sub parse_log($)
+{
+ my $log = shift;
+ my @files;
+
+ # in order to get a good representation of weeks at the start and end of the
+ # year (so that we don't get 1/2 of the data at the end, and 1/2 at the start
+ # of the next one), we use "epoch_week" - week since 1970-01-01 (1st week)
+ my $old_date = "";
+ my $epoch_week;
+ my %epoch_week_to_year;
+
+ while (<$log>) {
+ my $line = $_;
+ if ( $line =~ m/^([^ ]+) - - \[([^\/]+)\/([^\/]+)\/([^:]+):([0-9][0-9])[^\]]*\] "GET ([^"]*) HTTP\/[^"]*" ([0-9]+) ([0-9]+)/ ) {
+ #print "$1, $2, $3, $4, $5, $6\n";
+ my ( $ip, $day, $month, $year, $hour, $file, $status, $size ) = ( $1, $2, $month_to_num{$3}, $4, $5, $6, $7, $8 );
+
+ # we are interested only in redirects and successful downloads
+ next if ( $status != 302 && $status != 200 && $status != 206 );
+
+ # partial download? - only count when it finished
+ if ( $status == 206 )
+ {
+ if ( $line =~ / size:([0-9]+) bytes=([0-9]+)-([0-9]*)$/ )
+ {
+ my ( $wanted, $from ) = ( $1, $2 );
+ next if ( $wanted != $from + $size );
+ }
+ else {
+ next;
+ }
+ }
+
+ # canonicalize
+ $file =~ s/^\s+//;
+ $file =~ s/\s+$//;
+ $file =~ s/^http:\/\/download.documentfoundation.org//g;
+ $file =~ s/\/\//\//g;
+ $file =~ s/\?.*//g;
+ $file =~ s/;jsessionid=.*//g;
+ $file =~ s/\/libreoffice\/box\///g;
+ $file =~ s/\/libreoffice\/old\///g;
+ $file =~ s/\/libreoffice\/portable\///g;
+ $file =~ s/\/libreoffice\/stable\///g;
+ $file =~ s/\/libreoffice\/testing\///g;
+
+ # not interesting path starts
+ next if ( is_uninteresting_file ($file) );
+
+ my %filerec;
+ $filerec{file} = $file;
+
+ next if ( ! characterise(\%filerec, $file) );
+
+ # update the $epoch_week, if necessary
+ if ( "$year-$month-$day" ne $old_date ) {
+ $old_date = "$year-$month-$day";
+
+ # 1970-01-01 is Thursday, add 3 days (259200 seconds), and divide
+ my $seconds = POSIX::strftime( "%s", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual
+ $epoch_week = sprintf( "%d", ($seconds + 259200) / 604800 );
+
+ # remember the week
+ my $week = POSIX::strftime( "%V", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual
+ $epoch_week_to_year{$epoch_week} = sprintf( "$year-w%02d", $week );
+ }
+
+ $filerec{date} = $epoch_week;
+ $filerec{pretty_date} = $epoch_week_to_year{$epoch_week};
+
+ push @files, \%filerec;
+ }
+# elsif ($verbose) { # don't touch a global variable it's bad news
+# print STDERR "invalid line in apache logs: '$line'\n";
+# }
+ }
+
+ return @files;
+}
+
+sub parse_type($@)
+{
+ my $type = shift;
+ my @file_list = @_;
+ my @results;
+ my $log;
+
+ while (@file_list) {
+ my $files = "";
+ for (my $i = 0; $i < $bzcat_grouping; $i++) {
+ my $file = shift (@file_list) || next;
+ $files = "$files $file";
+ }
+ open ($log, "$type $files |") || die "Can't '$type $files': $!";
+ push @results, parse_log($log);
+ close $log;
+ print STDERR ".";
+ }
+
+ return @results;
+}
+
+sub parse_logs($)
+{
+ my $filelist = shift;
+ my @results;
+ my @bzipped;
+ my @gzipped;
+
+ for my $file (@{$filelist}) {
+ if ($file =~ m/\.bz2$/) {
+ push @bzipped, $file;
+ } elsif ($file =~ m/\.gz$/) {
+ push @gzipped, $file;
+ } else {
+ my $log;
+ open ($log, "$file") || die "Can't open '$file': $!";
+ push @results, parse_log($log);
+ close $log;
+ }
+ }
+
+ push @results, parse_type('bzcat', @bzipped);
+ push @results, parse_type('zcat', @gzipped);
+
+ return \@results;
+}
+
+sub merge_results($)
+{
+ my $list = shift;
+
+ for my $filerec (@{$list}) {
+
+ # without helppacks and langpacks
+ next if ( $filerec->{langpack} );
+
+ # build list of files
+ my $file = $filerec->{file};
+ if (!defined $all_files_list{$file}) {
+ $all_files_list{$file} = 0;
+ }
+ $all_files_list{$file}++;
+
+ my $date = $filerec->{pretty_date};
+ my $ver = $filerec->{version};
+ my $product = $filerec->{product};
+
+ # accumulate products
+ $products{$product} = 1;
+
+ # aggregate versions
+ $allversions{$ver} = 1;
+
+ $total_downloads++;
+
+ # aggregate by product
+ if ( !defined( $date_product_count{$date} ) ||
+ !defined( $date_product_count{$date}{$product} ) ) {
+ $date_product_count{$date}{$product} = 0;
+ }
+ ++$date_product_count{$date}{$product};
+
+ # aggregate by version
+ if ( !defined( $date_version_count{$date} ) ||
+ !defined( $date_version_count{$date}{$ver} ) ) {
+ $date_version_count{$date}{$ver} = 0;
+ }
+ ++$date_version_count{$date}{$ver};
+ }
+}
+
+sub spawn_parse_log_thread($)
+{
+ my ($file_list) = @_;
+ return threads->create( { 'context' => 'list' },
+ sub { return parse_logs($file_list); } );
+}
+
+while (my $arg = shift @ARGV) {
+ if ($arg eq '-v' || $arg eq '--verbose') {
+ $verbose = 1;
+ } elsif ($arg eq '-c' || $arg eq '--cpus') {
+ $cpus_to_use = shift @ARGV;
+ } elsif ($arg eq '-u' || $arg eq '--update') {
+ $rsync_first = 1;
+ } elsif (!defined $path_to_log_tree) {
+ $path_to_log_tree = $arg;
+ } else {
+ die "Unknown parameter '$arg'";
+ }
+}
+
+if (!defined $path_to_log_tree) {
+ $path_to_log_tree = `pwd`;
+ chomp ($path_to_log_tree);
+ $path_to_log_tree = "$path_to_log_tree/downloads";
+}
+
+my @log_filenames = find_logs ($path_to_log_tree);
+if ($verbose) {
+ print STDERR "Have log paths of:\n\t" . (join("\n\t", @log_filenames)) . "\n";
+}
+
+# update first
+if ($rsync_first) {
+ system('rsync --delete -av bilbo.documentfoundation.org:/var/log/apache2/download.documentfoundation.org/ downloads/download.documentfoundation.org/ 1>&2');
+ system('rsync --delete -av bilbo.documentfoundation.org:/var/log/apache2/downloadarchive.documentfoundation.org/ downloads/downloadarchive.documentfoundation.org/ 1>&2');
+}
+
+# the slow piece - parsing the logs
+my $files_in = @log_filenames;
+my $parallel = $cpus_to_use;
+print STDERR "reading log data $files_in files:\n";
+
+if ($threaded) {
+ # divide up the work first.
+ my @thread_files;
+ for (my $i = 0; $i < $parallel; $i++) {
+ my @foo; $thread_files[$i] = \@foo;
+ }
+ while (@log_filenames) {
+ for (my $i = 0; $i < $parallel; $i++) {
+ my $file = shift (@log_filenames) || next;
+ push @{$thread_files[$i]}, $file;
+ }
+ }
+
+ my @threads;
+ for (my $i = 0; $i < $parallel; $i++) {
+ my $file_list = $thread_files[$i];
+ if (scalar (@{$file_list}) > 0) {
+ push @threads, spawn_parse_log_thread ($file_list);
+ }
+ }
+
+ print STDERR "joining threads: ";
+ while (@threads) {
+ my $thread = shift @threads;
+ merge_results($thread->join());
+ print STDERR "joined";
+ }
+ print STDERR "\n";
+} else {
+ merge_results(parse_logs(\@log_filenames));
+}
+
+my $generated_stamp = "Generated on: " . qx(date --rfc-3339=seconds);
+
+# ---------------------------------------------------------------------------------
+
+# now output this as a spreadsheet ... fods ...
+
+print << "EOF"
+<?xml version="1.0" encoding="UTF-8"?>
+<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+ xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
+ xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+ xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
+ xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+ xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
+ xmlns:xlink="http://www.w3.org/1999/xlink"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
+ xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
+ xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
+ xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
+ xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
+ xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
+ xmlns:math="http://www.w3.org/1998/Math/MathML"
+ xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+ xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
+ xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0"
+ xmlns:ooo="http://openoffice.org/2004/office"
+ xmlns:ooow="http://openoffice.org/2004/writer"
+ xmlns:oooc="http://openoffice.org/2004/calc"
+ xmlns:dom="http://www.w3.org/2001/xml-events"
+ xmlns:xforms="http://www.w3.org/2002/xforms"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:rpt="http://openoffice.org/2005/report"
+ xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"
+ xmlns:xhtml="http://www.w3.org/1999/xhtml"
+ xmlns:grddl="http://www.w3.org/2003/g/data-view#"
+ xmlns:tableooo="http://openoffice.org/2009/table"
+ xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
+ xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0"
+ xmlns:css3t="http://www.w3.org/TR/css3-text/"
+ office:version="1.2"
+ grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl"
+ office:mimetype="application/vnd.oasis.opendocument.spreadsheet">
+ <office:styles>
+ <number:date-style style:name="isodatenum">
+ <number:year number:style="long"/>
+ <number:text>-</number:text>
+ <number:month number:style="long"/>
+ <number:text>-</number:text>
+ <number:day number:style="long"/>
+ </number:date-style>
+ <number:percentage-style style:name="percent-number">
+ <number:number number:decimal-places="2" number:min-integer-digits="1"/>
+ <number:text>%</number:text>
+ </number:percentage-style>
+ <style:style style:name="boldheader" style:family="table-cell" style:parent-style-name="Default">
+ <style:text-properties fo:font-style="italic" fo:font-weight="bold"/>
+ </style:style>
+ <style:style style:name="isodate" style:family="table-cell" style:parent-style-name="Default"/>
+ <style:style style:name="percent" style:family="table-cell" style:parent-style-name="Default"
+ style:data-style-name="percent-number"/>
+ </office:styles>
+ <office:body>
+ <office:spreadsheet>
+ <table:table table:name="Graphs">
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>$generated_stamp</text:p>
+ </table:table-cell>
+ </table:table-row>
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Total downloads:</text:p>
+ </table:table-cell>
+ </table:table-row>
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="float"
+ office:value="$total_downloads"/>
+ </table:table-row>
+ </table:table>
+ <table:table table:name="ProductData">
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Date</text:p>
+ </table:table-cell>
+EOF
+;
+
+# ---------------------------------------------------------------------------------
+
+# By Product sheet
+
+my @prods = sort keys %products;
+for my $product (@prods) {
+print << "EOF"
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>$product</text:p>
+ </table:table-cell>
+EOF
+ ;
+}
+print << "EOF"
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Total</text:p>
+ </table:table-cell>
+ </table:table-row>
+EOF
+;
+
+my $row = 1;
+
+my $colcount = @prods;
+my $colname = coltoref ($colcount);
+# print STDERR "cols: $colcount - colname $colname @prods\n";
+
+for my $date (sort keys %date_product_count) {
+print << "EOF"
+ <table:table-row>
+EOF
+;
+ print_date_cell("isodate", $date);
+ for my $product (@prods) {
+ my $count = $date_product_count{$date}->{$product};
+ $count = 0 if (!defined $count);
+print << "EOF"
+ <table:table-cell office:value-type="float" office:value="$count"/>
+EOF
+;
+ }
+ $row++;
+print << "EOF"
+ <table:table-cell table:formula="of:=SUM([.B$row:.$colname$row])" office:value-type="float"/>
+ </table:table-row>
+EOF
+;
+}
+
+# Summary / formulae
+{
+ print << "EOF"
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Total</text:p>
+ </table:table-cell>
+EOF
+ ;
+ my $col;
+ for ($col = 1; $col <= $colcount + 1; $col++) {
+ my $ref = coltoref ($col);
+ print (" <table:table-cell table:formula=\"of:=SUM([.$ref"."2:.$ref$row])\" office:value-type=\"float\"/>\n");
+ }
+
+print << "EOF"
+ </table:table-row>
+EOF
+ ;
+}
+
+# Summary as %ages ...
+
+{
+ print << "EOF"
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Percent</text:p>
+ </table:table-cell>
+EOF
+ ;
+ my $col;
+ $row++;
+ my $totalref = coltoref($colcount + 1) . "$row";
+ for ($col = 1; $col <= $colcount + 1; $col++) {
+ my $ref = coltoref ($col);
+ print (" <table:table-cell table:style-name=\"percent\" table:formula=\"of:=[.$ref$row]/[.$totalref]\" office:value-type=\"percentage\"/>\n");
+ }
+
+print << "EOF"
+ </table:table-row>
+ </table:table>
+EOF
+ ;
+}
+
+# ---------------------------------------------------------------------------------
+
+# By version sheet
+
+# First collapse trivial / invalid versions - under 0.2%
+my @todelete = ();
+my $threshold = (2 * $total_downloads) / 1000;
+for my $version (keys %allversions) {
+ my $total = 0;
+ for my $date (keys %date_version_count) {
+ my $count = $date_version_count{$date}->{$version};
+ $count = 0 if(!defined $count);
+ $total = $total + $count;
+ }
+ if ($total < $threshold) {
+# print STDERR "collapsing trivial version '$version' count $total into 'invalid'\n";
+ push @todelete, $version;
+ for my $date (keys %date_version_count) {
+ my $count = $date_version_count{$date}->{$version};
+ if (defined $count) {
+ if (!defined $date_version_count{$date}->{'invalid'}) {
+ $date_version_count{$date}->{'invalid'} = $count;
+ } else {
+ $date_version_count{$date}->{'invalid'} += $count;
+ }
+ }
+ }
+ }
+}
+if (@todelete) {
+ for my $version (@todelete) {
+ delete $allversions{$version};
+ }
+ $allversions{'invalid'} = 1; # so we get the result
+}
+
+print << "EOF"
+ <table:table table:name="Versions">
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Date</text:p>
+ </table:table-cell>
+EOF
+;
+for my $version (sort keys %allversions) {
+print << "EOF"
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>$version</text:p>
+ </table:table-cell>
+EOF
+ ;
+}
+print << "EOF"
+ </table:table-row>
+EOF
+ ;
+ for my $date (sort keys %date_version_count) {
+print << "EOF"
+ <table:table-row>
+EOF
+;
+ print_date_cell("isodate", $date);
+ for my $ver (sort keys %allversions) {
+ my $count = $date_version_count{$date}->{$ver};
+ $count = 0 if(!defined $count);
+print << "EOF"
+ <table:table-cell office:value-type="float" office:value="$count"/>
+EOF
+;
+ }
+print << "EOF"
+ </table:table-row>
+EOF
+;
+ }
+
+print << "EOF"
+ </table:table>
+EOF
+ ;
+
+# ---------------------------------------------------------------------------------
+
+# misc. debugging / information
+
+print << "EOF"
+ <table:table table:name="Files">
+ <table:table-row>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>Name</text:p>
+ </table:table-cell>
+ <table:table-cell table:style-name="boldheader" office:value-type="string">
+ <text:p>count</text:p>
+ </table:table-cell>
+ </table:table-row>
+EOF
+ ;
+
+ for my $file (sort { $all_files_list{$b} <=> $all_files_list{$a} } keys %all_files_list) {
+ my $count = $all_files_list{$file};
+print << "EOF"
+ <table:table-row>
+ <table:table-cell office:value-type="string">
+ <text:p>$file</text:p>
+ </table:table-cell>
+ <table:table-cell office:value-type="float" office:value="$count"/>
+ </table:table-row>
+EOF
+ ;
+ }
+
+print << "EOF"
+ </table:table>
+EOF
+ ;
+
+# ---------------------------------------------------------------------------------
+
+# end of spreadsheet ...
+
+print << "EOF"
+ </office:spreadsheet>
+ </office:body>
+</office:document>
+EOF
+;
commit 81bff03148f8273376da7ac3c6ded1e62a1f1696
Author: Michael Meeks <michael.meeks at suse.com>
Date: Wed Apr 3 16:42:29 2013 +0100
remove old download-stats generation.
diff --git a/scripts/download-stats.pl b/scripts/download-stats.pl
deleted file mode 100755
index 13e906c..0000000
--- a/scripts/download-stats.pl
+++ /dev/null
@@ -1,704 +0,0 @@
-#!/usr/bin/perl -w
-#
-# This script parses, and interprets the output from:
-# pg_dump -a downloadstats -f stats_counter -F plain -t stats_counter
-# on a mirrorbrain server, thus:
-# cat stats_counter | ./dlstats.pl --libo --weekify > /tmp/output.fods
-#
-# It also parses the mirrorbrain data from an Apache server ...
-# wget http://download.services.openoffice.org/stats/csv/201201.csv # etc ...
-# cat 201201.csv 201202.csv 201203.csv | dlstats.pl --csv > /tmp/output.fods
-#
-# It also parses raw sql reload dumps
-#
-use strict;
-use Date::Parse;
-use POSIX qw(strftime);
-
-# First a massive database of population statistics
-# source: the CIA:
-# https://www.cia.gov/library/publications/the-world-factbook/rankorder/2119rank.html
-my %locale_to_population = (
-# Location => Citizens # Comment
- 'cn' => 1343239923,
- 'in' => 1205073612,
- 'us' => 313847465,
- 'id' => 248645008,
- 'br' => 199321413,
- 'pk' => 190291129, # Pakistan
- 'ng' => 170123740, # Nigeria
- 'bd' => 161083804, # Bangladesh
- 'ru' => 142517670,
- 'jp' => 127368088,
- 'mx' => 114975406,
- 'ph' => 103775002,
- 'vn' => 91519289, # Vietnam
- 'eg' => 83688164, # Egypt
- 'de' => 81305856,
- 'tr' => 79749461,
- 'ir' => 78868711, # Iran
- 'th' => 67091089,
- 'fr' => 65630692,
- 'gb' => 63047162,
- 'it' => 61261254,
- 'lt' => 61261254,
- 'kr' => 48860500,
- 'za' => 48810427,
- 'es' => 47042984,
- 'co' => 45239079,
- 'ua' => 44854065,
- 'ke' => 43013341, # Kenya
- 'ar' => 42192494,
- 'pl' => 38415284,
- 'dz' => 37367226, # Algeria
- 'ca' => 34300083,
- 'ma' => 32309239, # Morocco
- 'pe' => 29549517, # Peru
- 'my' => 29179952, # Malaysia
- 've' => 28047938,
- 'sa' => 26534504, # Saudi Arabia
- 'tw' => 23234936,
- 'au' => 22015576,
- 'ro' => 21848504,
- 'lk' => 21481334, # Sri Lanka
- 'kz' => 17522010, # Kazakhstan
- 'cl' => 17067369,
- 'nl' => 16730632,
- 'ec' => 15223680, # Ecuador
- 'gt' => 14099032, # Guatemala
- 'cu' => 11075244, # Vcuba
- 'pt' => 10781459,
- 'gr' => 10767827,
- 'tn' => 10732900, # Tunisia
- 'be' => 10438353,
- 'bo' => 10290003, # Bolivia
- 'cz' => 10177300,
- 'do' => 10088598, # Dominican Republic
- 'hu' => 9958453,
- 'by' => 9643566, # Belarus
- 'se' => 9103788,
- 'hn' => 8296693, # Honduras
- 'at' => 8219743,
- 'ch' => 7925517,
- 'il' => 7590758, # Israel
- 'rs' => 7276604,
- 'hk' => 7153519,
- 'bg' => 7037935, # Bulgaria
- 'py' => 6541591, # Paraguay
- 'sv' => 6090646, # El Salvador
- 'ni' => 5727707, # Nicaragua
- 'dk' => 5543453,
- 'sk' => 5483088,
- 'sg' => 5353494, # Singapore
- 'ae' => 5314317, # United Arab Emirates
- 'fi' => 5262930,
- 'ie' => 4722028, # Republic of Ireland
- 'no' => 4707270,
- 'cr' => 4636348, # Costa Rica
- 'hr' => 4480043, # Croatia
- 'nz' => 4327944,
- 'ba' => 3879296, # Bosnia and Herzegovina
- 'pr' => 3690923, # Puerto Rico
- 'md' => 3656843, # Moldova
- 'pa' => 3510045, # Panama
- 'uy' => 3316328, # Uruguay
- 'jm' => 2889187, # Jamaica
- 'kw' => 2646314, # Kuwait
- 'lv' => 2191580, # Latvia
- 'mk' => 2082370, # Republic of Macedonia
- 'si' => 1996617, # Slovenia
- 'ee' => 1274709, # Estonia
- 'tt' => 1226383, # Republic of Trinidad and Tobago
- 'cy' => 1138071, # Cyprus
- 'lu' => 509074, # Luxembourg
- 'mt' => 409836, # Malta
- 'mq' => 403795, # Martinique
- 'is' => 313183, # Iceland
- 'nc' => 260166, # New Caledonia
- );
-
-# segment by Date, then by Product, then count
-my %data;
-my %products;
-my %prod_names;
-my %byregion;
-my %countries;
-my %byversion;
-my %allversions;
-my $total_downloads = 0;
-my $crunch_langs;
-my $top_n_countries = 10;
-my %by_date_country;
-my $weekify = 0;
-my %weekified_dates;
-
-# FIXME: ODF is -incredibly- lame in this regard ... we badly want R1C1 style referencing here [!]
-sub coltoref($)
-{
- my $col = shift;
- die ("odff needs R1C1") if ($col > 25);
- return chr (ord('A') + $col);
-}
-
-sub print_date_cell($$)
-{
- my ($style,$date) = @_;
- if (!$weekify) {
-print << "EOF"
- <table:table-cell table:style-name="$style" office:value-type="date" office:date-value="$date"/>
-EOF
-;
- } else {
-print << "EOF"
- <table:table-cell table:style-name="$style" office:value-type="string">
- <text:p>$date</text:p>
- </table:table-cell>
-EOF
-;
- }
-}
-
-my $log_format;
-my $country_match;
-for my $arg (@ARGV) {
- die "pass --csv --libo or --sql and/or --weekify and/or --country=NN" if ($arg eq '--help' || $arg eq '-h');
- if ($arg eq '--csv' || $arg eq '-c') {
- $log_format = 'c';
- } elsif ($arg eq '--libo' || $arg eq -'l') {
- $log_format = 'l';
- } elsif ($arg eq '--sql' || $arg eq -'s') {
- $log_format = 's';
- } elsif ($arg eq '--weekify') {
- $weekify = 1;
- } elsif ($arg =~ m/--country=(\S+)$/) {
- $country_match = $1;
- } else {
- die "Unknown argument '$arg'";
- }
-}
-defined $log_format || die "you must pass a format type";
-# select the format you want
-
-print STDERR "Log format: $log_format\n";
-
-# Analysing stats:
-#
-# grep for 'multi' - yields the Windows installer ... (also grep for 'all_lang') - all of them [!]
-# grep for 'Linux' and 'en-US' yields total Linux main binary downloads ...
-# grep for 'Mac' and 'en-US' yields total Mac main binary numbers ...
-
-while (<STDIN>) {
- chomp();
- my $line = $_;
- my ($id, $date, $product, $osname, $version, $lang, $country, $count);
-
- $line =~ s/[\s\r\n]*$//;
-# print STDERR "line '$line'\n";
-
- my $type;
- my $clean_product;
-
- if ($log_format eq 'l' && # a database dump from mirrorbrain:
-# 17424 2011-01-25 LibO Win-x86 3.3.0 all_lang qa 1
- $line =~ m/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*$/) {
- ($id, $date, $product, $osname, $version, $lang, $country, $count) = ($1, $2, $3, $4, $5, $6, $7, $8);
-
-# print "$count downloads on date $date, os $osname $lang\n";
-
- if ($date lt '2011-01-25') {
-# print STDERR "ignoring $date\n";
- next
- }
-
- $prod_names{$product} = 1;
- # Ignore Mac / Linux help packs etc.
- if (($osname =~ /Linux/ || $osname =~ /MacOS/) && $lang eq 'en-US') {
- $clean_product = $osname;
- }
- # Detect Windows distinctions
- if (($product eq 'LibO' || $product eq 'LO' || $product eq 'BrOffice') &&
- $osname =~ /Win/ && ($lang =~ /multi/ || $lang =~ /all_lang/)) {
- $clean_product = "$osname-$lang";
- }
-
- # Detect PortableOffice distinctions
- if ($product eq 'LibreOfficePortable') {
- $clean_product = "Portable";
- }
-
- # Detect DVD image
- if ($product eq 'LibO-DVD') {
- $clean_product = "DVD";
- }
-
- # Count product downloads by region
- if (defined $clean_product) {
- $type = 'product';
- } else {
-# print STDERR "lang pack line '$line' => '$lang' '$country' '$count'\n";
- $type = 'lang pack';
- }
-
- } elsif ($log_format eq 'c' &&
-# 2012-01-03,OOo,3.3.0,Linux_x86-64_langpack-deb,as,se,1
- $line =~ m/^\s*(\S+),(\S+),(\S+),(\S+),(\S+),(\S+),(\d+)\s*$/) {
- my $project;
- ($date, $project, $version, $product, $lang, $country, $count) = ($1, $2, $3, $4, $5, $6, $7);
-# ERROR - convert the product ! ... to
-# $clean_product and $type ...
- if ($project =~ m/SDK/i) {
- $product = 'sdk';
- }
- if ($product =~ m/langpack/i) {
- $type = 'lang pack';
- } else {
- $type = 'product'
- }
- $prod_names{$product} = 1;
- $clean_product = $product;
- $clean_product =~ s/_langpack//;
- $clean_product =~ s/_install//;
- $clean_product =~ s/-wJRE//;
- $clean_product =~ s/-deb//;
- $clean_product =~ s/-rpm//;
-
-# print STDERR "$count downloads of $clean_product on date $date, of $lang from $country\n";
- } elsif ($log_format eq 's') {
-# INSERT INTO `clean_downloads` (`date`, `product`, `os`, `language`, `version`, `downloads`) VALUES
-# ('2008-12-21', 'OpenOffice.org', 'winwjre', 'es', '3.0.0�', 1),
- if ( $line =~ m/^\('(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'([^']+)',\s*(\d+)\s*\)[,;]\s*$/) {
- my $project;
- ($date, $project, $product, $country, $version, $count) = ($1, $2, $3, $4, $5, $6);
-
- if ($product =~ m/langpack/) {
- $type = 'lang pack';
- } else {
- $type = 'product';
- }
-
- if ($product =~ m/linux/i) {
- $clean_product = "linux";
- } elsif ($product =~ m/win/i) {
- $clean_product = "win";
- } elsif ($product =~ m/mac/i) {
- $clean_product = "mac";
- } elsif ($product =~ m/solar/i) {
- $clean_product = "solaris";
- } else {
- $clean_product = "other";
- }
- if ($product =~ m/64/) {
- $clean_product = $clean_product . "64";
- }
-
- if ($version =~ m/^(\d+\.\d+\.\d+)/) {
- $version = $1;
- } elsif ($version =~ m/^(\d+\.\d+)/) {
- $version = $1 . ".0";
- } elsif ($version =~ m/^(\d+)/) {
- $version = $1 . ".0.0";
- } elsif ($version =~ /^(...\d\d\d_m\d+)/) {
- $version = $1;
- } else {
-# print STDERR "invalid version: '$version'\n";
- $version = 'invalid';
- }
-
-# print STDERR "$count downloads of $product on date $date, from $country\n";
-# INSERT INTO `clean_downloads_summary` (`product`, `os`, `language`, `date`, `downloads`, `smooth_downloads`) VALUES'
-# ('BrOffice.org', 'linuxintel', 'pt-BR', '2008-10-13', 155, 155),
- } elsif ( $line =~ m/^\('(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'(\S+)',\s*'([^']+)',\s*(\d+)\s*\)[,;]\s*$/) { # need new regex
- # odd - duplicate data ? ignore it ...
- } else {
-# print STDERR "malformed sql line '$line'\n";
- next;
- }
- } else {
- print STDERR "malformed line '$line'\n";
- next;
- }
-
- if (defined $country_match) {
- $country =~ m/$country_match/ || next;
- }
-
- if ($weekify) {
- if (!defined $weekified_dates{$date}) {
- my @time = gmtime (str2time($date. "T01:01:01.000001"));
- $weekified_dates{$date} = (1900 + $time[5]) . "-". POSIX::strftime("%V", @time); # 2012-1 (week number)
- }
-# print STDERR "weekify date '$date' to '".$weekified_dates{$date}."'\n";
- $date = $weekified_dates{$date};
- }
-
- # Accumulate versions by date for products
- if ($type eq 'product') {
- my $byver;
- if (!defined $byversion{$date}) {
- my %byver = ();
- $byversion{$date} = \%byver;
- }
- my $norc_ver = $version; # remove 3.4.2-1 (rc1) type versions
- $norc_ver =~ s/\-\d$//;
- $allversions{$norc_ver} = 1;
- if (!defined $byversion{$date}->{$norc_ver}) {
- $byversion{$date}->{$norc_ver} = 0;
- }
- $byversion{$date}->{$norc_ver} += $count;
-
- $by_date_country{$date}->{$country} = 0 if (!defined $by_date_country{$date}->{$country});
- $by_date_country{$date}->{$country} += $count;
- }
-
- my %hash;
- $byregion{$type} = \%hash if (!defined $byregion{$type});
- $byregion{$type}->{$country} = 0 if (!defined $byregion{$type}->{$country});
- $byregion{$type}->{$country} += $count;
- $countries{$country} = 1;
-
- if (!defined $clean_product) {
-# print "uninteresting line '$line'\n";
- next;
- }
-
- $total_downloads += $count;
-
- $products{$clean_product} = 1;
- if (!defined $data{$date}) {
- my %byproduct;
- $data{$date} = \%byproduct;
- }
- if (!defined ($data{$date}->{$clean_product})) {
- $data{$date}->{$clean_product} = 0;
- }
- $data{$date}->{$clean_product} += $count;
-# print "count for '$date' and '$clean_product' == $data{$date}->{$clean_product} [ added $count ]\n";
-}
-
-print STDERR "Dirty product names:\n";
-for my $prod (sort keys %prod_names) {
- print STDERR "\t$prod\n";
-}
-
-my @countries_by_product;
- at countries_by_product = sort { $byregion{'product'}->{$b} <=> $byregion{'product'}->{$a} } keys %countries;
-my @top_countries = @countries_by_product;
- at top_countries = splice(@top_countries, 0, $top_n_countries);
-
-# now output this as a spreadsheet ... fods ...
-print << 'EOF'
-<?xml version="1.0" encoding="UTF-8"?>
-<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
- xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
- xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
- xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
- xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
- xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
- xmlns:xlink="http://www.w3.org/1999/xlink"
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
- xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
- xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
- xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
- xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
- xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
- xmlns:math="http://www.w3.org/1998/Math/MathML"
- xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
- xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
- xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0"
- xmlns:ooo="http://openoffice.org/2004/office"
- xmlns:ooow="http://openoffice.org/2004/writer"
- xmlns:oooc="http://openoffice.org/2004/calc"
- xmlns:dom="http://www.w3.org/2001/xml-events"
- xmlns:xforms="http://www.w3.org/2002/xforms"
- xmlns:xsd="http://www.w3.org/2001/XMLSchema"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xmlns:rpt="http://openoffice.org/2005/report"
- xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"
- xmlns:xhtml="http://www.w3.org/1999/xhtml"
- xmlns:grddl="http://www.w3.org/2003/g/data-view#"
- xmlns:tableooo="http://openoffice.org/2009/table"
- xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
- xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0"
- xmlns:css3t="http://www.w3.org/TR/css3-text/"
- office:version="1.2"
- grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl"
- office:mimetype="application/vnd.oasis.opendocument.spreadsheet">
- <office:styles>
- <number:date-style style:name="isodatenum">
- <number:year number:style="long"/>
- <number:text>-</number:text>
- <number:month number:style="long"/>
- <number:text>-</number:text>
- <number:day number:style="long"/>
- </number:date-style>
- <style:style style:name="boldheader" style:family="table-cell" style:parent-style-name="Default">
- <style:text-properties fo:font-style="italic" fo:font-weight="bold"/>
- </style:style>
- <style:style style:name="isodate" style:family="table-cell" style:parent-style-name="Default"/> <!-- style:data-style-name="isodatenum" -->
- </office:styles>
- <office:body>
- <office:spreadsheet>
- <table:table table:name="Graphs">
- </table:table>
- <table:table table:name="ProductData">
- <table:table-row>
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>Date</text:p>
- </table:table-cell>
-EOF
-;
-my @prods = sort keys %products;
-for my $product (@prods) {
-print << "EOF"
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>$product</text:p>
- </table:table-cell>
-EOF
- ;
-}
-print << "EOF"
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>Total</text:p>
- </table:table-cell>
- </table:table-row>
-EOF
-;
-
-my $row = 1;
-
-my $colcount = @prods;
-my $colname = coltoref ($colcount);
-print STDERR "cols: $colcount - colname $colname @prods\n";
-
-for my $date (sort keys %data) {
-print << "EOF"
- <table:table-row>
-EOF
-;
- print_date_cell("isodate", $date);
- for my $product (@prods) {
- my $count = $data{$date}->{$product};
- $count = 0 if (!defined $count);
-print << "EOF"
- <table:table-cell office:value-type="float" office:value="$count"/>
-EOF
-;
- }
- $row++;
-print << "EOF"
- <table:table-cell table:formula="of:=SUM([.B$row:.$colname$row])" office:value-type="float"/>
- </table:table-row>
-EOF
-;
-}
-
-# Summary / formulae
-{
- print << "EOF"
- <table:table-row>
- <table:table-cell/>
-EOF
- ;
- my $col;
- for ($col = 1; $col <= $colcount + 1; $col++) {
- my $ref = coltoref ($col);
- print (" <table:table-cell table:formula=\"of:=SUM([.$ref"."2:.$ref$row])\" office:value-type=\"float\"/>\n");
- }
-
-print << "EOF"
- </table:table-row>
-EOF
- ;
-}
-
-# Summary as %ages ...
-
-{
- print << "EOF"
- <table:table-row>
- <table:table-cell/>
-EOF
- ;
- my $col;
- $row++;
- my $totalref = coltoref($colcount + 1) . "$row";
- for ($col = 1; $col <= $colcount + 1; $col++) {
- my $ref = coltoref ($col);
- print (" <table:table-cell table:formula=\"of:=[.$ref$row]/[.$totalref]\" office:value-type=\"float\"/>\n");
- }
-
-print << "EOF"
- </table:table-row>
- </table:table>
-EOF
- ;
-}
-
-# LangData sheet
-
-print << "EOF"
- <table:table table:name="LangData">
- <table:table-row>
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>Location</text:p>
- </table:table-cell>
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>product</text:p>
- </table:table-cell>
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>lang-pack</text:p>
- </table:table-cell>
- </table:table-row>
-EOF
- ;
- for my $country (@countries_by_product) {
- my $product = 0; my $lang_pack = 0;
- $product += $byregion{'product'}->{$country} if (defined $byregion{'product'}->{$country});
- $lang_pack += $byregion{'lang pack'}->{$country} if (defined $byregion{'lang pack'}->{$country});
-print << "EOF"
- <table:table-row>
- <table:table-cell office:value-type="string"><text:p>$country</text:p></table:table-cell>
- <table:table-cell office:value-type="float" office:value="$product"/>
- <table:table-cell office:value-type="float" office:value="$lang_pack"/>
- </table:table-row>
-EOF
-;
- }
-
-print << "EOF"
- </table:table>
-EOF
-;
-
-# By version sheet
-
-# First collapse trivial / invalid versions - under 0.1% - particularly for the sql dbase
-my @todelete = ();
-my $threshold = $total_downloads / 1000;
-for my $version (keys %allversions) {
- my $total = 0;
- for my $date (keys %byversion) {
- my $count = $byversion{$date}->{$version};
- $count = 0 if(!defined $count);
- $total = $total + $count;
- }
- if ($total < $threshold) {
- print STDERR "collapsing trivial version '$version' count $total into 'invalid'\n";
- push @todelete, $version;
- for my $date (keys %byversion) {
- my $count = $byversion{$date}->{$version};
- if (defined $count) {
- if (!defined $byversion{$date}->{'invalid'}) {
- $byversion{$date}->{'invalid'} = $count;
- } else {
- $byversion{$date}->{'invalid'} += $count;
- }
- }
- }
- }
-}
-for my $version (@todelete) {
- delete $allversions{$version};
-}
-
-print << "EOF"
- <table:table table:name="Versions">
- <table:table-row>
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>Date</text:p>
- </table:table-cell>
-EOF
-;
-for my $version (sort keys %allversions) {
-print << "EOF"
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>$version</text:p>
- </table:table-cell>
-EOF
- ;
-}
-print << "EOF"
- </table:table-row>
-EOF
- ;
- for my $date (sort keys %byversion) {
-print << "EOF"
- <table:table-row>
-EOF
-;
- print_date_cell("isodate", $date);
- for my $ver (sort keys %allversions) {
- my $count = $byversion{$date}->{$ver};
- $count = 0 if(!defined $count);
-print << "EOF"
- <table:table-cell office:value-type="float" office:value="$count"/>
-EOF
-;
- }
-print << "EOF"
- </table:table-row>
-EOF
-;
- }
-
-print << "EOF"
- </table:table>
-EOF
- ;
-
-# Language product download / comparison sheet ...
-print << "EOF"
- <table:table table:name="TopDownloadLocations">
- <table:table-row>
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>Date</text:p>
- </table:table-cell>
-EOF
-;
-
-for my $lang (@top_countries) {
-print << "EOF"
- <table:table-cell table:style-name="boldheader" office:value-type="string">
- <text:p>$lang</text:p>
- </table:table-cell>
-EOF
- ;
-}
-print << "EOF"
- </table:table-row>
-EOF
-;
-
-for my $date (sort keys %data) {
-print << "EOF"
- <table:table-row>
-EOF
-;
- print_date_cell("isodate", $date);
- for my $lang (@top_countries) {
- my $count = $by_date_country{$date}->{$lang};
- $count = 0 if (!defined $count);
-print << "EOF"
- <table:table-cell office:value-type="float" office:value="$count"/>
-EOF
-;
- }
-print << "EOF"
- </table:table-row>
-EOF
-;
-}
-print << "EOF"
- </table:table>
-EOF
- ;
-
-# end of spreadsheet ...
-
-print << "EOF"
- </office:spreadsheet>
- </office:body>
-</office:document>
-EOF
-;
More information about the Libreoffice-commits
mailing list