[Libreoffice-commits] core.git: 4 commits - bin/get-bugzilla-attachments-by-mimetype

Michael Stahl mstahl at redhat.com
Mon Nov 11 07:48:03 PST 2013


 bin/get-bugzilla-attachments-by-mimetype |  102 ++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 40 deletions(-)

New commits:
commit d4fd1c0c38179967166f64342d216e46d7f4d6d3
Author: Michael Stahl <mstahl at redhat.com>
Date:   Mon Nov 11 13:44:36 2013 +0100

    get-bugzilla-attachments-by-mimetype: more launchpad fixes
    
    - look at more interesting packages on lanuchpad, not just libreoffice.
    - the searchTasks method by default does not return closed tasks,
      and there does not appear to be a documented wild card search,
      so stupidly enumerate all possible status.
    
    Change-Id: I51691506874722a1d8eea4755513edf50164cf9d

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index 05a24e7..6655ea8 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -182,40 +182,41 @@ def get_launchpad_bugs(prefix):
     ubuntu = launchpad.distributions["ubuntu"]
 
     #since searching bugs having attachments with specific mimetypes is not available in launchpad API
-    #we're iterating over all bugs of the libreoffice source package
-    libo = ubuntu.getSourcePackage(name="libreoffice")
-    libobugs = libo.searchTasks()
-
-    for bugtask in libobugs:
-        bug = bugtask.bug
-        id = str(bug.id)
-        print("parsing " + id + " status: " + bugtask.status + " title: " + bug.title[:50])
-        attachmentid = 0
-        for attachment in bug.attachments:
-            attachmentid += 1
-            handle = attachment.data.open()
-            if not handle.content_type in mimetypes:
-                #print "skipping"
-                continue
-
-            suffix = mimetypes[handle.content_type]
-            if not os.path.isdir(suffix):
-                try:
-                    os.mkdir(suffix)
-                except:
-                    pass
-
-            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
-
-            if os.path.isfile(download):
-                print("assuming " + id + " is up to date")
-                break
+    #we're iterating over all bugs of the most interesting source packages
+    for pkg in ["libreoffice", "openoffice.org", "abiword", "gnumeric", "koffice", "calligra"]:
+        srcpkg = ubuntu.getSourcePackage(name=pkg)
+        pkgbugs = srcpkg.searchTasks(status=["New", "Fix Committed", "Invalid", "Won't Fix", "Confirmed", "Triaged", "In Progress", "Incomplete", "Incomplete (with response)", "Incomplete (without response)", "Fix Released", "Opinion", "Expired"])
+
+        for bugtask in pkgbugs:
+            bug = bugtask.bug
+            id = str(bug.id)
+            print("parsing " + id + " status: " + bugtask.status + " title: " + bug.title[:50])
+            attachmentid = 0
+            for attachment in bug.attachments:
+                attachmentid += 1
+                handle = attachment.data.open()
+                if not handle.content_type in mimetypes:
+                    #print "skipping"
+                    continue
+
+                suffix = mimetypes[handle.content_type]
+                if not os.path.isdir(suffix):
+                    try:
+                        os.mkdir(suffix)
+                    except:
+                        pass
+
+                download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
+
+                if os.path.isfile(download):
+                    print("assuming " + id + " is up to date")
+                    break
 
-            print('mimetype is ' + handle.content_type + ' downloading as ' + download)
+                print('mimetype is ' + handle.content_type + ' downloading as ' + download)
 
-            f = open(download, "w")
-            f.write(handle.read())
-            f.close()
+                f = open(download, "w")
+                f.write(handle.read())
+                f.close()
 
 freedesktop = 'http://bugs.freedesktop.org/buglist.cgi'
 abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
commit bad960e65f4d00315ea7c12cc00b84b26680eb9d
Author: Michael Stahl <mstahl at redhat.com>
Date:   Mon Nov 11 12:45:40 2013 +0100

    get-bugzilla-attachments-by-mimetype: better test for existing file
    
    Change-Id: I208a74d11945986d0712970999dbd33c03efe488

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index 4e588af..05a24e7 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -98,6 +98,11 @@ def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
 
             attachmentid += 1
 
+            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
+            if os.path.isfile(download):
+                print("assuming " + download + " is up to date")
+                continue
+
             realAttachmentId = match.group(1)
             handle = urlopen_retry(novellattach + realAttachmentId)
             if not handle:
@@ -115,7 +120,6 @@ def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
                 print("skipping")
                 continue
 
-            download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
             print('downloading as ' + download)
             f = open(download, 'wb')
             f.write(handle.read())
commit 3e9d164a06d60e756dffad4dd18795796348e97e
Author: Michael Stahl <mstahl at redhat.com>
Date:   Sun Nov 10 19:58:58 2013 +0100

    get-bugzilla-attachments-by-mimetype: add some missing mime types
    
    ... which are officially registered on
    https://www.iana.org/assignments/media-types/application
    
    ... plus some more non-standard ones for FreeHand, Keynote, ClarisWorks.
    
    For Apple Keynote there are 2 different ones that appear to be widely
    used.
    
    Change-Id: I26d4a85733a744188cc87a78fdba0d9d3f44da96

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index 15864cf..4e588af 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -271,11 +271,21 @@ mimetypes = {
     'application/msword': 'doc',
     'application/vnd.ms-powerpoint': 'ppt',
     'application/vnd.ms-excel': 'xls',
+    'application/vnd.ms-excel.sheet.binary.macroEnabled.12': 'xlsb',
+    'application/vnd.ms-excel.sheet.macroEnabled.12': 'xlsm',
+    'application/vnd.ms-excel.template.macroEnabled.12': 'xltm',
+    'application/vnd.ms-powerpoint.presentation.macroEnabled.12': 'pptm',
+    'application/vnd.ms-powerpoint.slide.macroEnabled.12': 'sldm',
+    'application/vnd.ms-powerpoint.slideshow.macroEnabled.12': 'ppsm',
+    'application/vnd.ms-powerpoint.template.macroEnabled.12': 'potm',
+    'application/vnd.ms-word.document.macroEnabled.12': 'docm',
+    'application/vnd.ms-word.template.macroEnabled.12': 'dotm',
     'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
     'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
     'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
     'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
     'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
+    'application/vnd.openxmlformats-officedocument.presentationml.slide': 'sldx',
     'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
     'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
     'application/vnd.visio': 'vsd',
@@ -287,6 +297,7 @@ mimetypes = {
     'text/html': 'html',
     'application/docbook+xml': 'docbook',
 # misc
+    'text/csv': 'csv',
     'text/spreadsheet': 'slk',
     'application/vnd.corel-draw': 'cdr',
     'application/vnd.lotus-wordpro': 'lwp',
@@ -294,6 +305,10 @@ mimetypes = {
     'application/vnd.wordperfect': 'wpd',
     'application/wordperfect5.1': 'wpd',
     'application/vnd.ms-works': 'wps',
+    'application/clarisworks' : 'cwk',
+    'application/macwriteii' : 'mw',
+    'application/vnd.apple.keynote': 'key',
+    'application/x-iwork-keynote-sffkey': 'key',
     'application/x-hwp': 'hwp',
     'application/x-aportisdoc': 'pdb',
     'application/x-pocket-word': 'psw',
@@ -314,6 +329,7 @@ mimetypes = {
     'application/vnd.stardivision.writer': 'sdw5',
     'application/vnd.stardivision.writer-global': 'sgl5',
 # relatively uncommon image mimetypes
+    'image/x-freehand': 'fh',
     'image/cgm': 'cgm',
     'image/tiff': 'tiff',
     'image/vnd.dxf': 'dxf',
commit fdb747ff8c4653d3e94192693f1080398ae20339
Author: Michael Stahl <mstahl at redhat.com>
Date:   Sun Nov 10 19:17:17 2013 +0100

    get-bugzilla-attachments-by-mimetype: more Python 3 in exception handler
    
    ... and also fix the print functions that shouldn't output a newline.
    
    Change-Id: Ifd866cb33b3ef9a2e83625ed03d5cb836c1ba56b

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index e3fb177..15864cf 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -18,6 +18,7 @@
 #
 #where X is the n'th attachment of that type in the bug
 
+from __future__ import print_function
 import feedparser
 import base64
 import re
@@ -40,7 +41,7 @@ def urlopen_retry(url):
         try:
             return urlopen(url)
         except IOError as e:
-            print("caught IOError: " + e)
+            print("caught IOError: " + str(e))
             if maxretries == i:
                 raise
             print("retrying...")
@@ -51,17 +52,17 @@ def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
     if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
         print("assuming " + id + " is up to date")
     else:
-        print("parsing", id)
+        print("parsing " + id)
         sock = urlopen_retry(url+"&ctype=xml")
         dom = minidom.parse(sock)
         sock.close()
         attachmentid=0
         for attachment in dom.getElementsByTagName('attachment'):
             attachmentid += 1
-            print(" mimetype is")
+            print(" mimetype is", end=' ')
             for node in attachment.childNodes:
                 if node.nodeName == 'type':
-                    print(node.firstChild.nodeValue)
+                    print(node.firstChild.nodeValue, end=' ')
                     if node.firstChild.nodeValue.lower() != mimetype.lower():
                         print('skipping')
                         break
@@ -102,14 +103,14 @@ def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
             if not handle:
                 print("attachment %s is not accessible" % realAttachmentId)
                 continue
-            print(" mimetype is")
+            print(" mimetype is", end=' ')
 
             info = handle.info()
             if info.get_content_type:
                 remoteMime = info.get_content_type()
             else:
                 remoteMime = info.gettype()
-            print(remoteMime)
+            print(remoteMime, end=' ')
             if remoteMime != mimetype:
                 print("skipping")
                 continue
@@ -161,7 +162,7 @@ def get_through_rss_query_url(url, mimetype, prefix, suffix):
         except KeyboardInterrupt:
             raise # Ctrl+C should work
         except:
-            print(entry['id'] + " failed: " + sys.exc_info()[0])
+            print(entry['id'] + " failed: " + str(sys.exc_info()[0]))
             pass
 
 def get_through_rss_query(queryurl, mimetype, prefix, suffix):


More information about the Libreoffice-commits mailing list