[Clipart] tag used in openclipart
Roger Martín
rg1024 at gmail.com
Wed Jan 12 02:07:29 PST 2011
2011/1/12 Francis Bond <bond at ieee.org>
>
> I am very interested in how you got the data. Could you show us how to do
> it?
>
>
I have a copy of ocal database ( date 12/dic/2010) and with a little and
rudimentary php program (see end) made the stats.
What do you need ? a sql table with all tag, for example?
Off course, i can program all batches process.
the program (sorry if you don't understand php)
<?php
mysql_connect("localhost","openclipart","opencli") or die("no conect");
mysql_selectdb("openclipart") or die("no db");
$rs= mysql_query("SELECT id,filename, upload_tags FROM ocal_files"); // can
be improved
$resul = array();
$ntags =0;
$record=0;
$thewinner="";
$total=0;
while ( $row= mysql_fetch_array($rs)) {
$total++;
$tags = explode(",", strtolower($row["upload_tags"]));
$words= count($tags); //($row["upload_tags"]);
if ( $words==1 && $tags[0]==""){
$notags++;
$wordsResult[0]++;
$example[0] = $row['id'];
continue;
}
$wordsResult[$words]++;
$example[$words] = $row['id'];
foreach( $tags as $tag){
if ($tag) {
$resul[$tag]++;
$ntags++;
}
}
if ($words>$record){
$record= $words;
$thewinner= "{$row['filename']} (
http://www.openclipart.org/detail/{$row['id']}<http://www.openclipart.org/detail/%7B$row%5B%27id%27%5D%7D>),
tags:<br>".strtolower($row["upload_tags"])."<br> with $record tags";
}
}
echo "<p>Total tags: ", $ntags, "<br>Distinct: ", count($resul),
"<br>Media: ", round($ntags/count($resul),2);
echo "<br>Clipart without tags: $notags";
echo "<br>total clipart: $total";
echo "<p>Random 100 tags<br>";
foreach (array_rand($resul,100) as $tag){
echo "$tag ({$resul[$tag]}), ";
}
echo "<p>top 100 tag<br>";
arsort($resul);
$i=0;
foreach ($resul as $tag=>$counter){
$i++;
echo "$tag ($counter), ";
if ($i>100)
break;
}
echo "<p>less used<br>";
asort($resul);
$i=0;
foreach ($resul as $tag=>$counter){
$i++;
echo "$tag ($counter), ";
if ($i>100)
break;
}
echo "<p>Clipart with..<br>";
ksort($wordsResult);
foreach ($wordsResult as $tag=>$counter){
echo "with $tag tags: $counter (", round($counter/$total*100,3), "% ) for
example ", $example[$tag], "<br>";
}
echo "<p>The winner is $thewinner</p>";
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/clipart/attachments/20110112/14ad2671/attachment.html>
More information about the clipart
mailing list