[Libreoffice-commits] .: bin/get-bugzilla-attachments-by-mimetype

Thorsten Behrens thorsten at kemper.freedesktop.org
Tue Apr 17 00:43:00 PDT 2012


 bin/get-bugzilla-attachments-by-mimetype |  278 +++++++++++++++++++++----------
 1 file changed, 191 insertions(+), 87 deletions(-)

New commits:
commit 04101329727b355752f31684d27bd86f71b88640
Author: Gökçen Eraslan <gokcen.eraslan at gmail.com>
Date:   Sun Apr 15 16:39:12 2012 +0300

    fdo#46538 Easyhack: Add support for more bug trackers to get-bugzilla-attachments-by-mimetype
    
    This adds support for Novell, KDE (for KOffice/Calligra), GNOME (for
    Gnumeric), AbiSource (for AbiWord) bugzillas and Launchpad to
    bin/get-bugzilla-attachments-by-mimetype script as required by
    easyhack#46538. However, there two glitches and workarounds:
    
    1- As stated in the code, Novell bugzilla requires users to log in to bugzilla in
    order to get full information about the bugs such as attachment bodies etc. Since
    Novell bugzilla uses some proprietary login extensions it's hard to implement log in
    mechanism and use it via xmlrpc API. python-bugzilla claims that it supports Novell
    bugzilla login but I could not make it work. As a workaround, comments containing
    "Created an attachment (id=xxxxxxx)" pattern are parsed and attachments are downloaded
    using urllib. Attachments are filtered using mimetype info obtained from http headers.
    
    2- For launchpad, launchpadlib python module is used. But currently launchpad api
    does not support really advanced search like bugzilla. For example, it's not possible
    to search for bugs having application/pdf attachments. So, again a workaround is used. Code
    iterates over all the bugs of the *libreoffice* source package and check their attachments.

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index 0b18060..7316f0a 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -39,6 +39,7 @@
 import urllib
 import feedparser
 import base64
+import re
 import os, os.path
 import xmlrpclib
 from xml.dom import minidom
@@ -83,6 +84,42 @@ def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
                     break
             attachmentid += 1
 
+def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
+    id = url.rsplit('=', 2)[1]
+    print "id is", prefix, id, suffix
+    if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
+        print "assuming", id, "is up to date"
+    else:
+        print "parsing", id
+        sock = urlopen_retry(url+"&ctype=xml")
+        dom = minidom.parse(sock)
+        sock.close()
+        attachmentid=1
+        for comment in dom.getElementsByTagName('thetext'):
+            commentText = comment.firstChild.nodeValue
+            match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText)
+            if not match:
+                continue
+
+            realAttachmentId = match.group(1)
+            handle = urlopen_retry(novellattach + realAttachmentId)
+            if not handle:
+                print "attachment %s is not accessible", realAttachmentId
+                continue 
+
+            remoteMime = handle.info().gettype()
+            if remoteMime != mimetype:
+                print "skipping"
+                continue
+
+            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
+            print 'downloading as', download
+            f = open(download, 'w')
+            f.write(handle.read())
+            f.close()
+            break
+            attachmentid += 1
+
 def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix):
     try:
         proxy = xmlrpclib.ServerProxy(rpcurl)
@@ -109,108 +146,164 @@ def get_through_rss_query_url(url, mimetype, prefix, suffix):
     except:
         pass
     d = feedparser.parse(url)
+
+    #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla
+    #get_novell_bug_via_xml function is a workaround for that situation
+    get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml
+
     for entry in d['entries']:
-        get_from_bug_url_via_xml(entry['id'], mimetype, prefix, suffix)
+        get_bug_function(entry['id'], mimetype, prefix, suffix)
 
 def get_through_rss_query(queryurl, mimetype, prefix, suffix):
     url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss'
     print 'url is', url
     get_through_rss_query_url(url, mimetype, prefix, suffix)
 
+def get_launchpad_bugs(prefix):
+    #launchpadlib python module is required to download launchpad attachments
+    from launchpadlib.launchpad import Launchpad
+
+    launchpad = Launchpad.login_anonymously("attachmentdownload", "production")
+    ubuntu = launchpad.distributions["ubuntu"]
+
+    #since searching bugs having attachments with specific mimetypes is not available in launchpad API
+    #we're iterating over all bugs of the libreoffice source package
+    libo = ubuntu.getSourcePackage(name="libreoffice")
+    libobugs = libo.getBugTasks()
+
+    for bugtask in libobugs:
+        bug = bugtask.bug
+        id = str(bug.id)
+        print "parsing ", id, "status:", bugtask.status, "title:", bug.title[:50]
+        attachmentid = 1
+        for attachment in bug.attachments:
+            handle = attachment.data.open()
+            if not handle.content_type in mimetypes:
+                #print "skipping"
+                continue
+
+            suffix = mimetypes[handle.content_type]
+            if not os.path.isdir(suffix):
+                try:
+                    os.mkdir(suffix)
+                except:
+                    pass
+
+            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
+
+            if os.path.isfile(download):
+                print "assuming", id, "is up to date"
+                break
+
+            print 'mimetype is', handle.content_type, 'downloading as', download
+
+            f = open(download, "w")
+            f.write(handle.read())
+            f.close()
+            attachmentid += 1
 
 freedesktop = 'http://bugs.freedesktop.org/buglist.cgi'
+abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
+gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric
+kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra
 openoffice = 'https://issues.apache.org/ooo/buglist.cgi'
 redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi'
 redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id='
-novell = 'https://bugzilla.novell.com/buglist.cgi'
 mozilla = 'https://bugzilla.mozilla.org/buglist.cgi'
 
-mimetypes = [
+#Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc.
+#As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually
+#python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login
+#system is a nightmare
+novellattach = 'https://bugzilla.novell.com/attachment.cgi?id='
+novell = 'https://bugzilla.novell.com/buglist.cgi'
+
+mimetypes = {
 # ODF
-    ('application/vnd.oasis.opendocument.base', 'odb'),
-    ('application/vnd.oasis.opendocument.database', 'odb'),
-    ('application/vnd.oasis.opendocument.chart', 'odc'),
-    ('application/vnd.oasis.opendocument.chart-template', 'otc'),
-    ('application/vnd.oasis.opendocument.formula', 'odf'),
-    ('application/vnd.oasis.opendocument.formula-template', 'otf'),
-    ('application/vnd.oasis.opendocument.graphics', 'odg'),
-    ('application/vnd.oasis.opendocument.graphics-template', 'otg'),
-    ('application/vnd.oasis.opendocument.graphics-flat-xml', 'fodg'),
-    ('application/vnd.oasis.opendocument.presentation', 'odp'),
-    ('application/vnd.oasis.opendocument.presentation-template', 'otp'),
-    ('application/vnd.oasis.opendocument.presentation-flat-xml', 'fodp'),
-    ('application/vnd.oasis.opendocument.spreadsheet', 'ods'),
-    ('application/vnd.oasis.opendocument.spreadsheet-template', 'ots'),
-    ('application/vnd.oasis.opendocument.spreadsheet-flat-xml', 'fods'),
-    ('application/vnd.oasis.opendocument.text', 'odt'),
-    ('application/vnd.oasis.opendocument.text-flat-xml', 'fodt'),
-    ('application/vnd.oasis.opendocument.text-master', 'odm'),
-    ('application/vnd.oasis.opendocument.text-template', 'ott'),
-    ('application/vnd.oasis.opendocument.text-web', 'oth'),
+    'application/vnd.oasis.opendocument.base': 'odb',
+    'application/vnd.oasis.opendocument.database': 'odb',
+    'application/vnd.oasis.opendocument.chart': 'odc',
+    'application/vnd.oasis.opendocument.chart-template': 'otc',
+    'application/vnd.oasis.opendocument.formula': 'odf',
+    'application/vnd.oasis.opendocument.formula-template': 'otf',
+    'application/vnd.oasis.opendocument.graphics': 'odg',
+    'application/vnd.oasis.opendocument.graphics-template': 'otg',
+    'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg',
+    'application/vnd.oasis.opendocument.presentation': 'odp',
+    'application/vnd.oasis.opendocument.presentation-template': 'otp',
+    'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp',
+    'application/vnd.oasis.opendocument.spreadsheet': 'ods',
+    'application/vnd.oasis.opendocument.spreadsheet-template': 'ots',
+    'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods',
+    'application/vnd.oasis.opendocument.text': 'odt',
+    'application/vnd.oasis.opendocument.text-flat-xml': 'fodt',
+    'application/vnd.oasis.opendocument.text-master': 'odm',
+    'application/vnd.oasis.opendocument.text-template': 'ott',
+    'application/vnd.oasis.opendocument.text-web': 'oth',
 # OOo XML
-    ('application/vnd.sun.xml.base', 'odb'),
-    ('application/vnd.sun.xml.calc', 'sxc'),
-    ('application/vnd.sun.xml.calc.template', 'stc'),
-    ('application/vnd.sun.xml.chart', 'sxs'),
-    ('application/vnd.sun.xml.draw', 'sxd'),
-    ('application/vnd.sun.xml.draw.template', 'std'),
-    ('application/vnd.sun.xml.impress', 'sxi'),
-    ('application/vnd.sun.xml.impress.template', 'sti'),
-    ('application/vnd.sun.xml.math', 'sxm'),
-    ('application/vnd.sun.xml.writer', 'sxw'),
-    ('application/vnd.sun.xml.writer.global', 'sxg'),
-    ('application/vnd.sun.xml.writer.template', 'stw'),
-    ('application/vnd.sun.xml.writer.web', 'stw'),
+    'application/vnd.sun.xml.base': 'odb',
+    'application/vnd.sun.xml.calc': 'sxc',
+    'application/vnd.sun.xml.calc.template': 'stc',
+    'application/vnd.sun.xml.chart': 'sxs',
+    'application/vnd.sun.xml.draw': 'sxd',
+    'application/vnd.sun.xml.draw.template': 'std',
+    'application/vnd.sun.xml.impress': 'sxi',
+    'application/vnd.sun.xml.impress.template': 'sti',
+    'application/vnd.sun.xml.math': 'sxm',
+    'application/vnd.sun.xml.writer': 'sxw',
+    'application/vnd.sun.xml.writer.global': 'sxg',
+    'application/vnd.sun.xml.writer.template': 'stw',
+    'application/vnd.sun.xml.writer.web': 'stw',
 # MSO
-    ('application/rtf', 'rtf'),
-    ('text/rtf', 'rtf'),
-    ('application/msword', 'doc'),
-    ('application/vnd.ms-powerpoint', 'ppt'),
-    ('application/vnd.ms-excel', 'xls'),
-    ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xlsx'),
-    ('application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'xltx'),
-    ('application/vnd.openxmlformats-officedocument.presentationml.presentation', 'pptx'),
-    ('application/vnd.openxmlformats-officedocument.presentationml.template', 'ppotx'),
-    ('application/vnd.openxmlformats-officedocument.presentationml.slideshow', 'ppsx'),
-    ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'docx'),
-    ('application/vnd.openxmlformats-officedocument.wordprocessingml.template', 'dotx'),
-    ('application/vnd.visio', 'vsd'),
+    'application/rtf': 'rtf',
+    'text/rtf': 'rtf',
+    'application/msword': 'doc',
+    'application/vnd.ms-powerpoint': 'ppt',
+    'application/vnd.ms-excel': 'xls',
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
+    'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
+    'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
+    'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
+    'application/vnd.visio': 'vsd',
 # W3C
-    ('application/xhtml+xml', 'xhtml'),
-    ('application/mathml+xml', 'mml'),
-    ('text/html', 'html'),
-    ('application/docbook+xml', 'docbook'),
+    'application/xhtml+xml': 'xhtml',
+    'application/mathml+xml': 'mml',
+    'text/html': 'html',
+    'application/docbook+xml': 'docbook',
 # misc
-    ('text/spreadsheet', 'slk'),
-    ('application/pdf', 'pdf'),
-    ('application/vnd.corel-draw', 'cdr'),
-    ('application/vnd.lotus-wordpro', 'lwp'),
-    ('application/vnd.lotus-1-2-3', 'wks'),
-    ('application/vnd.wordperfect', 'wpd'),
-    ('application/vnd.ms-works', 'wps'),
-    ('application/x-hwp', 'hwp'),
-    ('application/x-aportisdoc', 'pdb'),
-    ('application/x-pocket-word', 'psw'),
-    ('application/x-t602', '602'),
+    'text/spreadsheet': 'slk',
+    'application/pdf': 'pdf',
+    'application/vnd.corel-draw': 'cdr',
+    'application/vnd.lotus-wordpro': 'lwp',
+    'application/vnd.lotus-1-2-3': 'wks',
+    'application/vnd.wordperfect': 'wpd',
+    'application/vnd.ms-works': 'wps',
+    'application/x-hwp': 'hwp',
+    'application/x-aportisdoc': 'pdb',
+    'application/x-pocket-word': 'psw',
+    'application/x-t602': '602',
 # binfilter
-    ('application/x-starcalc', 'sdc'),
-    ('application/vnd.stardivision.calc', 'sdc5'),
-    ('application/x-starchart', 'sds'),
-    ('application/vnd.stardivision.chart', 'sds5'),
-    ('application/x-stardraw', 'sdd_d'),
-    ('application/vnd.stardivision.draw', 'sda5'),
-    ('application/x-starimpress', 'sdd_i'),
-    ('application/vnd.stardivision.impress', 'sdd5'),
-    ('application/vnd.stardivision.impress-packed', 'sdp5'),
-    ('application/x-starmath', 'smf'),
-    ('application/vnd.stardivision.math', 'smf5'),
-    ('application/x-starwriter', 'sdw'),
-    ('application/vnd.stardivision.writer', 'sdw5'),
-    ('application/vnd.stardivision.writer-global', 'sgl5'),
+    'application/x-starcalc': 'sdc',
+    'application/vnd.stardivision.calc': 'sdc5',
+    'application/x-starchart': 'sds',
+    'application/vnd.stardivision.chart': 'sds5',
+    'application/x-stardraw': 'sdd_d',
+    'application/vnd.stardivision.draw': 'sda5',
+    'application/x-starimpress': 'sdd_i',
+    'application/vnd.stardivision.impress': 'sdd5',
+    'application/vnd.stardivision.impress-packed': 'sdp5',
+    'application/x-starmath': 'smf',
+    'application/vnd.stardivision.math': 'smf5',
+    'application/x-starwriter': 'sdw',
+    'application/vnd.stardivision.writer': 'sdw5',
+    'application/vnd.stardivision.writer-global': 'sgl5',
 # unusual image mimetypes
-    ('image/cgm', 'cgm'),
-    ('image/x-targa', 'tga'),
-]
+    'image/cgm': 'cgm',
+    'image/x-targa': 'tga',
+}
 
 # disabled for now, this would download gigs of pngs/jpegs...
 common_image_mimetypes = [
@@ -241,16 +334,27 @@ common_image_mimetypes = [
     ('image/png', 'png'),
 ]
 
-for (mimetype,extension) in mimetypes:
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(novell, mimetype, "novell", extension)
+
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(kde, mimetype, "kde", extension)
+
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(gnome, mimetype, "gnome", extension)
+
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(abisource, mimetype, "abi", extension)
+
+for (mimetype,extension) in mimetypes.items():
     get_through_rss_query(freedesktop, mimetype, "fdo", extension)
 
-for (mimetype,extension) in mimetypes:
+for (mimetype,extension) in mimetypes.items():
     get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension)
 
-#to-do, get attachments some other way, not inline in xml
-#get_through_rss_query(novell, 'application/msword', "n", "doc")
-
-for (mimetype,extension) in mimetypes:
+for (mimetype,extension) in mimetypes.items():
     get_through_rss_query(openoffice, mimetype, "ooo", extension)
 
+get_launchpad_bugs("lp")
+
 # vim:set shiftwidth=4 softtabstop=4 expandtab:


More information about the Libreoffice-commits mailing list