[PATCH] fdo#46538 Easyhack: Add support for more bug trackers to get-bugzilla-attachments-by-mimetype

Gökçen Eraslan gokcen.eraslan at gmail.com
Sun Apr 15 06:39:12 PDT 2012


Hello,

Here is a patch adding support for Novell, KDE (for KOffice/Calligra), GNOME (for Gnumeric), AbiSource
(for AbiWord) bugzillas and Launchpad to bin/get-bugzilla-attachments-by-mimetype script as required by easyhack#46538. However, there two glitches and workarounds:

1- As stated in the code, Novell bugzilla requires users to log in to bugzilla in
order to get full information about the bugs such as attachment bodies etc. Since
Novell bugzilla uses some proprietary login extensions it's hard to implement log in
mechanism and use it via xmlrpc API. python-bugzilla claims that it supports Novell
bugzilla login but I could not make it work. As a workaround, comments containing
"Created an attachment (id=xxxxxxx)" pattern are parsed and attachments are downloaded
using urllib. Attachments are filtered using mimetype info obtained from http headers.

2- For launchpad, launchpadlib python module is used. But currently launchpad api
does not support really advanced search like bugzilla. For example, it's not possible
to search for bugs having application/pdf attachments. So, again a workaround is used. Code
iterates over all the bugs of the *libreoffice* source package and check their attachments.

---
 bin/get-bugzilla-attachments-by-mimetype |  278 ++++++++++++++++++++----------
 1 file changed, 191 insertions(+), 87 deletions(-)

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index 0b18060..7316f0a 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -39,6 +39,7 @@
 import urllib
 import feedparser
 import base64
+import re
 import os, os.path
 import xmlrpclib
 from xml.dom import minidom
@@ -83,6 +84,42 @@ def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
                     break
             attachmentid += 1
 
+def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
+    id = url.rsplit('=', 2)[1]
+    print "id is", prefix, id, suffix
+    if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
+        print "assuming", id, "is up to date"
+    else:
+        print "parsing", id
+        sock = urlopen_retry(url+"&ctype=xml")
+        dom = minidom.parse(sock)
+        sock.close()
+        attachmentid=1
+        for comment in dom.getElementsByTagName('thetext'):
+            commentText = comment.firstChild.nodeValue
+            match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText)
+            if not match:
+                continue
+
+            realAttachmentId = match.group(1)
+            handle = urlopen_retry(novellattach + realAttachmentId)
+            if not handle:
+                print "attachment %s is not accessible", realAttachmentId
+                continue 
+
+            remoteMime = handle.info().gettype()
+            if remoteMime != mimetype:
+                print "skipping"
+                continue
+
+            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
+            print 'downloading as', download
+            f = open(download, 'w')
+            f.write(handle.read())
+            f.close()
+            break
+            attachmentid += 1
+
 def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix):
     try:
         proxy = xmlrpclib.ServerProxy(rpcurl)
@@ -109,108 +146,164 @@ def get_through_rss_query_url(url, mimetype, prefix, suffix):
     except:
         pass
     d = feedparser.parse(url)
+
+    #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla
+    #get_novell_bug_via_xml function is a workaround for that situation
+    get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml
+
     for entry in d['entries']:
-        get_from_bug_url_via_xml(entry['id'], mimetype, prefix, suffix)
+        get_bug_function(entry['id'], mimetype, prefix, suffix)
 
 def get_through_rss_query(queryurl, mimetype, prefix, suffix):
     url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss'
     print 'url is', url
     get_through_rss_query_url(url, mimetype, prefix, suffix)
 
+def get_launchpad_bugs(prefix):
+    #launchpadlib python module is required to download launchpad attachments
+    from launchpadlib.launchpad import Launchpad
+
+    launchpad = Launchpad.login_anonymously("attachmentdownload", "production")
+    ubuntu = launchpad.distributions["ubuntu"]
+
+    #since searching bugs having attachments with specific mimetypes is not available in launchpad API
+    #we're iterating over all bugs of the libreoffice source package
+    libo = ubuntu.getSourcePackage(name="libreoffice")
+    libobugs = libo.getBugTasks()
+
+    for bugtask in libobugs:
+        bug = bugtask.bug
+        id = str(bug.id)
+        print "parsing ", id, "status:", bugtask.status, "title:", bug.title[:50]
+        attachmentid = 1
+        for attachment in bug.attachments:
+            handle = attachment.data.open()
+            if not handle.content_type in mimetypes:
+                #print "skipping"
+                continue
+
+            suffix = mimetypes[handle.content_type]
+            if not os.path.isdir(suffix):
+                try:
+                    os.mkdir(suffix)
+                except:
+                    pass
+
+            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
+
+            if os.path.isfile(download):
+                print "assuming", id, "is up to date"
+                break
+
+            print 'mimetype is', handle.content_type, 'downloading as', download
+
+            f = open(download, "w")
+            f.write(handle.read())
+            f.close()
+            attachmentid += 1
 
 freedesktop = 'http://bugs.freedesktop.org/buglist.cgi'
+abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
+gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric
+kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra
 openoffice = 'https://issues.apache.org/ooo/buglist.cgi'
 redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi'
 redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id='
-novell = 'https://bugzilla.novell.com/buglist.cgi'
 mozilla = 'https://bugzilla.mozilla.org/buglist.cgi'
 
-mimetypes = [
+#Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc.
+#As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually
+#python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login
+#system is a nightmare
+novellattach = 'https://bugzilla.novell.com/attachment.cgi?id='
+novell = 'https://bugzilla.novell.com/buglist.cgi'
+
+mimetypes = {
 # ODF
-    ('application/vnd.oasis.opendocument.base', 'odb'),
-    ('application/vnd.oasis.opendocument.database', 'odb'),
-    ('application/vnd.oasis.opendocument.chart', 'odc'),
-    ('application/vnd.oasis.opendocument.chart-template', 'otc'),
-    ('application/vnd.oasis.opendocument.formula', 'odf'),
-    ('application/vnd.oasis.opendocument.formula-template', 'otf'),
-    ('application/vnd.oasis.opendocument.graphics', 'odg'),
-    ('application/vnd.oasis.opendocument.graphics-template', 'otg'),
-    ('application/vnd.oasis.opendocument.graphics-flat-xml', 'fodg'),
-    ('application/vnd.oasis.opendocument.presentation', 'odp'),
-    ('application/vnd.oasis.opendocument.presentation-template', 'otp'),
-    ('application/vnd.oasis.opendocument.presentation-flat-xml', 'fodp'),
-    ('application/vnd.oasis.opendocument.spreadsheet', 'ods'),
-    ('application/vnd.oasis.opendocument.spreadsheet-template', 'ots'),
-    ('application/vnd.oasis.opendocument.spreadsheet-flat-xml', 'fods'),
-    ('application/vnd.oasis.opendocument.text', 'odt'),
-    ('application/vnd.oasis.opendocument.text-flat-xml', 'fodt'),
-    ('application/vnd.oasis.opendocument.text-master', 'odm'),
-    ('application/vnd.oasis.opendocument.text-template', 'ott'),
-    ('application/vnd.oasis.opendocument.text-web', 'oth'),
+    'application/vnd.oasis.opendocument.base': 'odb',
+    'application/vnd.oasis.opendocument.database': 'odb',
+    'application/vnd.oasis.opendocument.chart': 'odc',
+    'application/vnd.oasis.opendocument.chart-template': 'otc',
+    'application/vnd.oasis.opendocument.formula': 'odf',
+    'application/vnd.oasis.opendocument.formula-template': 'otf',
+    'application/vnd.oasis.opendocument.graphics': 'odg',
+    'application/vnd.oasis.opendocument.graphics-template': 'otg',
+    'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg',
+    'application/vnd.oasis.opendocument.presentation': 'odp',
+    'application/vnd.oasis.opendocument.presentation-template': 'otp',
+    'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp',
+    'application/vnd.oasis.opendocument.spreadsheet': 'ods',
+    'application/vnd.oasis.opendocument.spreadsheet-template': 'ots',
+    'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods',
+    'application/vnd.oasis.opendocument.text': 'odt',
+    'application/vnd.oasis.opendocument.text-flat-xml': 'fodt',
+    'application/vnd.oasis.opendocument.text-master': 'odm',
+    'application/vnd.oasis.opendocument.text-template': 'ott',
+    'application/vnd.oasis.opendocument.text-web': 'oth',
 # OOo XML
-    ('application/vnd.sun.xml.base', 'odb'),
-    ('application/vnd.sun.xml.calc', 'sxc'),
-    ('application/vnd.sun.xml.calc.template', 'stc'),
-    ('application/vnd.sun.xml.chart', 'sxs'),
-    ('application/vnd.sun.xml.draw', 'sxd'),
-    ('application/vnd.sun.xml.draw.template', 'std'),
-    ('application/vnd.sun.xml.impress', 'sxi'),
-    ('application/vnd.sun.xml.impress.template', 'sti'),
-    ('application/vnd.sun.xml.math', 'sxm'),
-    ('application/vnd.sun.xml.writer', 'sxw'),
-    ('application/vnd.sun.xml.writer.global', 'sxg'),
-    ('application/vnd.sun.xml.writer.template', 'stw'),
-    ('application/vnd.sun.xml.writer.web', 'stw'),
+    'application/vnd.sun.xml.base': 'odb',
+    'application/vnd.sun.xml.calc': 'sxc',
+    'application/vnd.sun.xml.calc.template': 'stc',
+    'application/vnd.sun.xml.chart': 'sxs',
+    'application/vnd.sun.xml.draw': 'sxd',
+    'application/vnd.sun.xml.draw.template': 'std',
+    'application/vnd.sun.xml.impress': 'sxi',
+    'application/vnd.sun.xml.impress.template': 'sti',
+    'application/vnd.sun.xml.math': 'sxm',
+    'application/vnd.sun.xml.writer': 'sxw',
+    'application/vnd.sun.xml.writer.global': 'sxg',
+    'application/vnd.sun.xml.writer.template': 'stw',
+    'application/vnd.sun.xml.writer.web': 'stw',
 # MSO
-    ('application/rtf', 'rtf'),
-    ('text/rtf', 'rtf'),
-    ('application/msword', 'doc'),
-    ('application/vnd.ms-powerpoint', 'ppt'),
-    ('application/vnd.ms-excel', 'xls'),
-    ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xlsx'),
-    ('application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'xltx'),
-    ('application/vnd.openxmlformats-officedocument.presentationml.presentation', 'pptx'),
-    ('application/vnd.openxmlformats-officedocument.presentationml.template', 'ppotx'),
-    ('application/vnd.openxmlformats-officedocument.presentationml.slideshow', 'ppsx'),
-    ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'docx'),
-    ('application/vnd.openxmlformats-officedocument.wordprocessingml.template', 'dotx'),
-    ('application/vnd.visio', 'vsd'),
+    'application/rtf': 'rtf',
+    'text/rtf': 'rtf',
+    'application/msword': 'doc',
+    'application/vnd.ms-powerpoint': 'ppt',
+    'application/vnd.ms-excel': 'xls',
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
+    'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
+    'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
+    'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
+    'application/vnd.visio': 'vsd',
 # W3C
-    ('application/xhtml+xml', 'xhtml'),
-    ('application/mathml+xml', 'mml'),
-    ('text/html', 'html'),
-    ('application/docbook+xml', 'docbook'),
+    'application/xhtml+xml': 'xhtml',
+    'application/mathml+xml': 'mml',
+    'text/html': 'html',
+    'application/docbook+xml': 'docbook',
 # misc
-    ('text/spreadsheet', 'slk'),
-    ('application/pdf', 'pdf'),
-    ('application/vnd.corel-draw', 'cdr'),
-    ('application/vnd.lotus-wordpro', 'lwp'),
-    ('application/vnd.lotus-1-2-3', 'wks'),
-    ('application/vnd.wordperfect', 'wpd'),
-    ('application/vnd.ms-works', 'wps'),
-    ('application/x-hwp', 'hwp'),
-    ('application/x-aportisdoc', 'pdb'),
-    ('application/x-pocket-word', 'psw'),
-    ('application/x-t602', '602'),
+    'text/spreadsheet': 'slk',
+    'application/pdf': 'pdf',
+    'application/vnd.corel-draw': 'cdr',
+    'application/vnd.lotus-wordpro': 'lwp',
+    'application/vnd.lotus-1-2-3': 'wks',
+    'application/vnd.wordperfect': 'wpd',
+    'application/vnd.ms-works': 'wps',
+    'application/x-hwp': 'hwp',
+    'application/x-aportisdoc': 'pdb',
+    'application/x-pocket-word': 'psw',
+    'application/x-t602': '602',
 # binfilter
-    ('application/x-starcalc', 'sdc'),
-    ('application/vnd.stardivision.calc', 'sdc5'),
-    ('application/x-starchart', 'sds'),
-    ('application/vnd.stardivision.chart', 'sds5'),
-    ('application/x-stardraw', 'sdd_d'),
-    ('application/vnd.stardivision.draw', 'sda5'),
-    ('application/x-starimpress', 'sdd_i'),
-    ('application/vnd.stardivision.impress', 'sdd5'),
-    ('application/vnd.stardivision.impress-packed', 'sdp5'),
-    ('application/x-starmath', 'smf'),
-    ('application/vnd.stardivision.math', 'smf5'),
-    ('application/x-starwriter', 'sdw'),
-    ('application/vnd.stardivision.writer', 'sdw5'),
-    ('application/vnd.stardivision.writer-global', 'sgl5'),
+    'application/x-starcalc': 'sdc',
+    'application/vnd.stardivision.calc': 'sdc5',
+    'application/x-starchart': 'sds',
+    'application/vnd.stardivision.chart': 'sds5',
+    'application/x-stardraw': 'sdd_d',
+    'application/vnd.stardivision.draw': 'sda5',
+    'application/x-starimpress': 'sdd_i',
+    'application/vnd.stardivision.impress': 'sdd5',
+    'application/vnd.stardivision.impress-packed': 'sdp5',
+    'application/x-starmath': 'smf',
+    'application/vnd.stardivision.math': 'smf5',
+    'application/x-starwriter': 'sdw',
+    'application/vnd.stardivision.writer': 'sdw5',
+    'application/vnd.stardivision.writer-global': 'sgl5',
 # unusual image mimetypes
-    ('image/cgm', 'cgm'),
-    ('image/x-targa', 'tga'),
-]
+    'image/cgm': 'cgm',
+    'image/x-targa': 'tga',
+}
 
 # disabled for now, this would download gigs of pngs/jpegs...
 common_image_mimetypes = [
@@ -241,16 +334,27 @@ common_image_mimetypes = [
     ('image/png', 'png'),
 ]
 
-for (mimetype,extension) in mimetypes:
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(novell, mimetype, "novell", extension)
+
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(kde, mimetype, "kde", extension)
+
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(gnome, mimetype, "gnome", extension)
+
+for (mimetype,extension) in mimetypes.items():
+    get_through_rss_query(abisource, mimetype, "abi", extension)
+
+for (mimetype,extension) in mimetypes.items():
     get_through_rss_query(freedesktop, mimetype, "fdo", extension)
 
-for (mimetype,extension) in mimetypes:
+for (mimetype,extension) in mimetypes.items():
     get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension)
 
-#to-do, get attachments some other way, not inline in xml
-#get_through_rss_query(novell, 'application/msword', "n", "doc")
-
-for (mimetype,extension) in mimetypes:
+for (mimetype,extension) in mimetypes.items():
     get_through_rss_query(openoffice, mimetype, "ooo", extension)
 
+get_launchpad_bugs("lp")
+
 # vim:set shiftwidth=4 softtabstop=4 expandtab:
-- 
1.7.10



More information about the LibreOffice mailing list