[Libreoffice-commits] core.git: bin/get-bugzilla-attachments-by-mimetype

Sun Jan 26 12:03:29 PST 2014

bin/get-bugzilla-attachments-by-mimetype |   58 ++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 8 deletions(-)

New commits:
commit 06e9730b0347ef6c9dffea9508d7139c230a3e92
Author: jorendc <joren.libreoffice at telenet.be>
Date:   Wed Dec 18 01:12:58 2013 +0100

    Add basic multi-threading for each mimetype
    
    Change-Id: Ife0766ddd259bb7d86a9c7bdcf3e9c2849208cf0
    Reviewed-on: https://gerrit.libreoffice.org/7123
    Reviewed-by: Markus Mohrhard <markus.mohrhard at googlemail.com>
    Tested-by: Markus Mohrhard <markus.mohrhard at googlemail.com>

diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype
index 5f061c6..3480713 100755
--- a/bin/get-bugzilla-attachments-by-mimetype
+++ b/bin/get-bugzilla-attachments-by-mimetype
@@ -27,6 +27,7 @@ import re
 import os, os.path
 import stat
 import sys
+import threading, Queue
 try:
     from urllib.request import urlopen
 except:
@@ -450,14 +451,55 @@ common_noncore_mimetypes = {
     'application/pdf': 'pdf',
 }
 
-for (prefix, uri) in rss_bugzillas.items():
-    for (mimetype,extension) in mimetypes.items():
-        # It seems that bugzilla has problems returing that many results
-        # (10000 results is probably a limit set somewhere) so we always
-        # end processing the complete list.
-        if mimetype == 'text/html' and prefix == 'moz':
-                continue
-        get_through_rss_query(uri, mimetype, prefix, extension)
+class manage_threads(threading.Thread):
+    def run(self):
+        #print(threading.current_thread().get_ident())
+        while 1:
+            # Try to receive a job from queue
+            try:
+                # Get job from queue
+                # Use job parameters to call our query
+                # Then let the queue know we are done with this job
+                job = jobs.get(True,5)
+                get_through_rss_query(job[0], job[1], job[2], job[3]) # [0] = uri; [1] = mimetype; [2] = prefix; [3] = extension
+                jobs.task_done()
+            except KeyboardInterrupt:
+                raise # Ctrl+C should work
+            except:
+                break
+
+def generate_multi_threading():
+    for (prefix, uri) in rss_bugzillas.items():
+
+        # Initialize threads
+        for i in xrange(max_threads):
+            manage_threads().start()
+
+        # Create a job for every mimetype for a bugzilla
+        for (mimetype,extension) in mimetypes.items():
+
+
+            # It seems that bugzilla has problems returing that many results
+            # (10000 results is probably a limit set somewhere) so we always
+            # end processing the complete list.
+            if mimetype == 'text/html' and prefix == 'moz':
+                    continue
+
+            try:
+                jobs.put([uri, mimetype, prefix, extension], block=True, timeout=3)
+                print("successfully placed a job in the queue searching for " + mimetype + "in bugtracker " + prefix)
+            except KeyboardInterrupt:
+                raise # Ctrl+C should work
+            except:
+                print("Queue full")
+
+        # Continue when all mimetypes are done for a bugzilla
+        jobs.join()
+
+max_threads = 20 # Number of threads to create, (1 = without multi-threading)
+jobs = Queue.Queue(40)
+
+generate_multi_threading()
 
 for (mimetype,extension) in mimetypes.items():
     get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension)