<br><br><div class="gmail_quote">2011/1/12 Francis Bond <span dir="ltr"><<a href="mailto:bond@ieee.org" target="_blank">bond@ieee.org</a>></span><br><blockquote class="gmail_quote" style="margin: 0pt 0pt 0pt 0.8ex; border-left: 1px solid rgb(204, 204, 204); padding-left: 1ex;">
<br>
I am very interested in how you got the data. Could you show us how to do it?<br>
<br></blockquote><div><br>I have a copy of ocal database ( date 12/dic/2010) and with a little and rudimentary php program (see end) made the stats.<br>What do you need ? a sql table with all tag, for example? <br><br>Off course, i can program all batches process.<br>
<br><br></div></div>the program (sorry if you don't understand php)<br><br><?php<br><br>mysql_connect("localhost","openclipart","opencli") or die("no conect");<br>mysql_selectdb("openclipart") or die("no db");<br>
<br>$rs= mysql_query("SELECT id,filename, upload_tags FROM ocal_files"); // can be improved<br>$resul = array();<br>$ntags =0;<br>$record=0;<br>$thewinner="";<br>$total=0;<br><br>while ( $row= mysql_fetch_array($rs)) {<br>
$total++;<br> $tags = explode(",", strtolower($row["upload_tags"]));<br> $words= count($tags); //($row["upload_tags"]);<br> if ( $words==1 && $tags[0]==""){<br>
$notags++; <br> $wordsResult[0]++;<br> $example[0] = $row['id']; <br> continue;<br> } <br> $wordsResult[$words]++;<br> $example[$words] = $row['id']; <br>
<br>
foreach( $tags as $tag){<br> if ($tag) {<br> $resul[$tag]++;<br> $ntags++; <br> }<br> }<br> if ($words>$record){<br> $record= $words; <br> $thewinner= "{$row['filename']} (<a href="http://www.openclipart.org/detail/%7B$row%5B%27id%27%5D%7D" target="_blank">http://www.openclipart.org/detail/{$row['id']}</a>), tags:<br>".strtolower($row["upload_tags"])."<br> with $record tags";<br>
}<br> } <br> <br>echo "<p>Total tags: ", $ntags, "<br>Distinct: ", count($resul), <br> "<br>Media: ", round($ntags/count($resul),2);<br>echo "<br>Clipart without tags: $notags";<br>
echo "<br>total clipart: $total";<br><br><br>echo "<p>Random 100 tags<br>";<br>foreach (array_rand($resul,100) as $tag){<br> echo "$tag ({$resul[$tag]}), "; <br>
}<br>
<br>echo "<p>top 100 tag<br>";<br>arsort($resul);<br>$i=0;<br>foreach ($resul as $tag=>$counter){<br> $i++;<br> echo "$tag ($counter), ";<br> if ($i>100) <br> break;<br>
<br>}<br><br><br>echo "<p>less used<br>";<br>asort($resul);<br>$i=0;<br>foreach ($resul as $tag=>$counter){<br> $i++;<br> echo "$tag ($counter), ";<br> if ($i>100) <br>
break; <br>
}<br><br><br>echo "<p>Clipart with..<br>";<br>ksort($wordsResult);<br>foreach ($wordsResult as $tag=>$counter){<br> echo "with $tag tags: $counter (", round($counter/$total*100,3), "% ) for example ", $example[$tag], "<br>";<br>
}<br>echo "<p>The winner is $thewinner</p>";<br><br>