[Libreoffice-commits] .: solenv/bin

Thorsten Behrens thorsten at kemper.freedesktop.org
Wed Feb 2 07:24:46 PST 2011


 solenv/bin/th_check.pl |  105 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

New commits:
commit eacd806bcbe948b3ab8644eb59642b66946ba691
Author: Steve Butler <sebutler at gmail.com>
Date:   Wed Feb 2 16:22:52 2011 +0100

    Utility to scan for some faults in Thesaurus files

diff --git a/solenv/bin/th_check.pl b/solenv/bin/th_check.pl
new file mode 100644
index 0000000..04acc3c
--- /dev/null
+++ b/solenv/bin/th_check.pl
@@ -0,0 +1,105 @@
+:
+eval 'exec perl -wS $0 ${1+"$@"}'
+    if 0;
+
+# Version: MPL 1.1 / GPLv3+ / LGPLv3+
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License or as specified alternatively below. You may obtain a copy of
+# the License at http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Initial Developer of the Original Code is
+#       Steven Butler <sebutler at gmail.com>
+# Portions created by the Initial Developer are Copyright (C) 2011 the
+# Initial Developer. All Rights Reserved.
+#
+# For minor contributions see the git repository.
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 3 or later (the "GPLv3+"), or
+# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
+# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
+# instead of those above.
+
+use strict;
+
+sub processFile($) {
+    my ($input) = @_;
+
+    if (!open(INPUT, $input)) {
+        print "FAIL: $input (no input found)\n";
+        return 1;
+    }
+    # top line of thesaurus provides encoding (we ignore it)
+    $_=<INPUT>;
+    my $line = 1;
+
+
+    my $expectedEntries;
+    my $actualEntries = 0;
+    my $word;
+    my %words = ();
+    my @errors = ();
+    while (<INPUT>){
+        $line++;
+        s/\n$//;
+        s/\r$//;
+        s/\s+$//;
+        if (m/^([^\|]+)\|(\d+)$/) {
+
+            my $tword = $1;
+            my $texpectedEntries = $2;
+            #print $tword, $texpectedEntries, "\n";
+            if (defined $expectedEntries) {
+                # Check if the last word's actual entries matched the expected
+                if ($actualEntries != $expectedEntries) {
+                    push @errors, "$words{$word}: $word defined to have $expectedEntries but seems to have $actualEntries (next word ($tword) found on line $line\n";
+                }
+            }
+            $word = $tword;
+            $expectedEntries = $texpectedEntries;
+            if (defined $words{$word}) {
+                push @errors, "$line: $word previously defined on $words{$word}\n";
+            } else {
+                $words{$word} = $line;
+            }
+            $actualEntries = 0;
+        } elsif (m/^[\(\-\|]/) {
+            $actualEntries++;
+        } else {
+            push @errors, "$line: Unrecognised line format: $_\n";
+            if (m/^(interj|prep|conj)\|/) {
+                $actualEntries++;
+            }
+        }
+
+    }
+    close(INPUT);
+
+
+    if (scalar(@errors)) {
+        print $input, ':', join($input.':', @errors);
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
+
+if (scalar(@ARGV) == 0) {
+    print "Usage: $0 <thesaurus .dat file>+\n";
+    print "\tscans for some common issues found in mythes format thesaurus files\n";
+    exit(1);
+}
+
+my $errors = 0;
+foreach (@ARGV) {
+    $errors += processFile($_);
+}
+exit($errors);


More information about the Libreoffice-commits mailing list