[Clipart] OpenClipart and Debian.
Jonadab the Unsightly One
jonadab at bright.net
Fri Jan 14 22:13:42 PST 2005
"Jonadab the Unsightly One" <jonadab at bright.net> writes:
> Ultimately I think our solution would be to have these images tagged
> with relevant keywords in their metadata... We can provide a script
> for... filtering, which would take on the command-line a list of
> keywords to filter out (i.e., delete images that have them).
I have checked clipart-filter.pl into CVS (under clipart_web), but for
the benefit of those following this discussion, it is also appended
here.
Additionally, I'm wondering whether it should be added to the tools
list.
Anyway, here's my barely-tested-at-all initial version of it:
---
#!/usr/bin/perl
# -*- cperl -*-
use SVG::Metadata;
use File::Spec::Functions;
use Data::Dumper;
if (not @ARGV) {
printusage(); exit 0;
}
my %option = map { s/^--//; (/^([^=]+)=(.*)/) ? ($1, $2) : ($_, 1) } grep { /^--/ } @ARGV;
my %filter = map { s/^-//; ($_, 1) } grep { /^-[^-]/ } @ARGV;
my %keep = map { s/^[+]//; ($_, 1) } grep { /^[+]/ } @ARGV;
if ($option{debug}) {
print Dumper(+{
filter => \%filter,
keep => \%keep,
option => \%option,
}) . "\n";
}
process(<*>);
exit 0;
sub process {
my (@d, @f, $f);
for $f (@_) {
if (-d $f) {
push @d, $f;
} elsif (-f $f) {
push @f, $f;
} else {
warn "Skipping $f";
}
}
processfile(@f) if @f;
processdir(@d) if @d;
}
sub processdir {
my ($d);
for $d (@_) {
if (opendir DIR, $d) {
warn "Descending into $d...\n" if $option{debug};
my @f = map { catfile($d, $_) } grep { not /^[.]+$/ } readdir DIR; closedir DIR;
process(@f);
} else {
warn "Cannot descend into $d: $!";
}
}
}
sub processfile {
my ($f);
for $f (@_) {
if ($f =~ /[.]svg$/i) {
my $meta = SVG::Metadata->new();
my $r; eval { $r = $meta->parse($f); };
if ($r and not $@) {
warn "Processing $f\n" if $option{debug} > 3;
my $filter = $option{draconianpurge};
my $keep;
for $k ($meta->keywords()) {
if ($keep{$k}) { ++$keep; }
elsif ($filter{$k}) { ++$filter; }
}
warn " Filter: $filter; Keep: $keep\n" if $option{debug} > 4;
purge($f) if ($filter and not $keep);
} else {
warn "Failed to parse metadata for $f\n";
}
} elsif ($option{debug}>2) {
warn "Skipping non-SVG file $f\n";
}
}
}
sub purge {
for $f (@_) {
my $basename = $f;
$basename =~ s/svg$//i; # Note the period is left, so foo.svg does not imply foo_bar.*
@f = glob($basename . "*");
my $doit = 1;
if ($option{promptme}) {
$doit = yesno("Delete ".(join ", ", @f)."?");
} elsif ($option{debug}) {
warn "Deleting ".(join ", ", @f)."\n";
}
if ($doit) {
for (@f) {
if ($option{simulate}) {
print "rm $_ \[simulated]\n";
} else {
unlink $_ or warn "Unable to delete $_: $!\n";
}}}}}
sub yesno {
my ($prompt) = @_;
local $| = 1;
print "$prompt (y/n) ==> ";
$_ = <STDIN>;
if (/y/i and /n/i) {
print <<"QUOTE";
"And it is also said, Go not to the Elves for
counsel, for they will say both no and yes."
-- Frodo Baggins
QUOTE
return yesno(@_);
} elsif (/y/i) { return 1;
} elsif (/n/i) { return;
} else { return yesno(@_);
}
}
sub printusage {
print <<"USAGE";
USAGE:
$0 [options]
This command will start from the current directory and descend into subdirectories,
looking for .svg files. Any that it finds it will filter (according to the options
you specify), along with any corresponding files with the same basename, such as
.png thumbnails.
Keywords specified with a preceding minus sign (hyphen) will cause images with
those keywords to be deleted, unless they also have a keyword specified on the
command-line with a plus symbol.
EXAMPLES:
$0 -nazi
Remove all images with the "nazi" keyword. (Nazi-related
images are problematic in Germany and possibly certain other
countries, so distributors putting together clipart packages
for use there should consider filtering these out.)
$0 -flag +united_nations_member +olympic_nation +international_flag
Remove images with the "flag" keyword unless they also have one
or more of the +keywords
$0 --draconianpurge +education +books +book
Remove everything not related to education or books. This should
result in a fairly small and focused collection.
SPECIAL OPTIONS:
--debug
Print extra information about what is happening.
--debug=n
Higher values of n increase the amount of extra info printed.
--draconianpurge
If this option is specified, all images will be removed unless they
have one of the keywords specified with a plus symbol.
--nosubdirs
Do not descend into subdirectories; process images in the current
directory only.
--promptme
Ask the user to confirm (y/n) each image that will be purged.
--simulate
Don't actually delete anything; just say what would be deleted.
CAVEATS:
You need at least version 0.14 of SVG::Metadata installed; older versions
will not see any keywords on the images.
This script is not well-tested yet and is designed to delete files.
Please use this script on duplicate copies only, not on the main
clipart library or the original copy of anything.
USAGE
}
__END__
--
$;=sub{$/};@;=map{my($a,$b)=($_,$;);$;=sub{$a.$b->()}}
split//,"ten.thgirb\@badanoj$/ --";$\=$ ;-> ();print$/
More information about the clipart
mailing list