[Libreoffice-commits] core.git: Branch 'libreoffice-4-2' - 3 commits - include/oox oox/source sc/Library_scfilt.mk sc/source

Michael Stahl mstahl at redhat.com
Mon Dec 2 04:32:27 PST 2013


 include/oox/core/xmlfilterbase.hxx        |    8 -
 oox/source/core/xmlfilterbase.cxx         |  170 +++++++++++++++---------------
 sc/Library_scfilt.mk                      |    1 
 sc/source/filter/inc/sheetdatacontext.hxx |   11 +
 sc/source/filter/inc/workbookhelper.hxx   |    6 -
 sc/source/filter/oox/sheetdatacontext.cxx |    6 +
 sc/source/filter/oox/threadpool.cxx       |  166 +++++++++++++++++++++++++++++
 sc/source/filter/oox/threadpool.hxx       |   53 +++++++++
 sc/source/filter/oox/workbookfragment.cxx |   93 +++++++++++++++-
 sc/source/filter/oox/workbookhelper.cxx   |    7 +
 10 files changed, 427 insertions(+), 94 deletions(-)

New commits:
commit 5b8b567762c6573eaf35ffa6508c0354fba68f34
Author: Michael Stahl <mstahl at redhat.com>
Date:   Wed Nov 27 20:50:44 2013 +0100

    sc: no such thing as std::vector::find
    
    Change-Id: I11bb688a8e0affda9ed78a61d9aba3d244914f8c

diff --git a/sc/source/filter/oox/threadpool.cxx b/sc/source/filter/oox/threadpool.cxx
index 9de1a14..8349661 100644
--- a/sc/source/filter/oox/threadpool.cxx
+++ b/sc/source/filter/oox/threadpool.cxx
@@ -9,6 +9,9 @@
 
 #include "threadpool.hxx"
 
+#include <algorithm>
+
+
 class ThreadPool::ThreadWorker : public salhelper::Thread
 {
     ThreadPool    *mpPool;
@@ -102,7 +105,8 @@ void ThreadPool::waitUntilWorkersDone()
     {
         rtl::Reference< ThreadWorker > xWorker = maWorkers.back();
         maWorkers.pop_back();
-        assert( maWorkers.find( xWorker ) == maWorkers.end() );
+        assert(std::find(maWorkers.begin(), maWorkers.end(), xWorker)
+                == maWorkers.end());
         xWorker->signalNewWork();
         aGuard.clear();
         { // unlocked
commit 4e5ec5476cde6861ead84202b2904bd96f4c307c
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Wed Nov 27 18:11:34 2013 +0000

    sc: threaded parsing of the core data inside large XLSX files
    
    Enabled in experimental mode only or via SC_IMPORT_THREADS=<N> this
    allows significant parallelisation of sheet reading. I also implement
    a simple thread pool to manage that.
    
    Conflicts:
    	sc/source/filter/oox/workbookfragment.cxx
    
    Change-Id: I66c72211f2699490230e993a374c26b1892eac12

diff --git a/sc/Library_scfilt.mk b/sc/Library_scfilt.mk
index 499f873..eb0d5d2 100644
--- a/sc/Library_scfilt.mk
+++ b/sc/Library_scfilt.mk
@@ -211,6 +211,7 @@ $(eval $(call gb_Library_add_exception_objects,scfilt,\
 	sc/source/filter/oox/tablebuffer \
 	sc/source/filter/oox/tablefragment \
 	sc/source/filter/oox/themebuffer \
+	sc/source/filter/oox/threadpool \
 	sc/source/filter/oox/unitconverter \
 	sc/source/filter/oox/viewsettings \
 	sc/source/filter/oox/workbookfragment \
diff --git a/sc/source/filter/inc/sheetdatacontext.hxx b/sc/source/filter/inc/sheetdatacontext.hxx
index b492d2a..3f3e377 100644
--- a/sc/source/filter/inc/sheetdatacontext.hxx
+++ b/sc/source/filter/inc/sheetdatacontext.hxx
@@ -23,6 +23,9 @@
 #include "excelhandlers.hxx"
 #include "richstring.hxx"
 #include "sheetdatabuffer.hxx"
+#include <vcl/svapp.hxx>
+
+#define MULTI_THREAD_SHEET_PARSING 1
 
 namespace oox {
 namespace xls {
@@ -54,8 +57,16 @@ struct SheetDataContextBase
  */
 class SheetDataContext : public WorksheetContextBase, private SheetDataContextBase
 {
+    // If we are doing threaded parsing, this SheetDataContext
+    // forms the inner loop for bulk data parsing, and for the
+    // duration of this we can drop the solar mutex.
+#if MULTI_THREAD_SHEET_PARSING
+    SolarMutexReleaser aReleaser;
+#endif
+
 public:
     explicit            SheetDataContext( WorksheetFragmentBase& rFragment );
+    virtual            ~SheetDataContext();
 
 protected:
     virtual ::oox::core::ContextHandlerRef onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs );
diff --git a/sc/source/filter/oox/sheetdatacontext.cxx b/sc/source/filter/oox/sheetdatacontext.cxx
index 5170234..9a0f7df 100644
--- a/sc/source/filter/oox/sheetdatacontext.cxx
+++ b/sc/source/filter/oox/sheetdatacontext.cxx
@@ -90,6 +90,12 @@ SheetDataContext::SheetDataContext( WorksheetFragmentBase& rFragment ) :
     mnRow( -1 ),
     mnCol( -1 )
 {
+    SAL_INFO( "sc.filter",  "start safe sheet data context - unlock\n" );
+}
+
+SheetDataContext::~SheetDataContext()
+{
+    SAL_INFO( "sc.filter",  "end safe sheet data context - relock\n" );
 }
 
 ContextHandlerRef SheetDataContext::onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs )
diff --git a/sc/source/filter/oox/threadpool.cxx b/sc/source/filter/oox/threadpool.cxx
new file mode 100644
index 0000000..9de1a14
--- /dev/null
+++ b/sc/source/filter/oox/threadpool.cxx
@@ -0,0 +1,162 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "threadpool.hxx"
+
+class ThreadPool::ThreadWorker : public salhelper::Thread
+{
+    ThreadPool    *mpPool;
+    osl::Condition maNewWork;
+public:
+    ThreadWorker( ThreadPool *pPool ) :
+        salhelper::Thread("sheet-import-thread-pool"),
+        mpPool( pPool ) {}
+
+    virtual void execute()
+    {
+        ThreadTask *pTask;
+        while ( ( pTask = waitForWork() ) )
+        {
+            pTask->doWork();
+            delete pTask;
+        }
+    }
+
+    ThreadTask *waitForWork()
+    {
+        ThreadTask *pRet = NULL;
+
+        osl::ResettableMutexGuard aGuard( mpPool->maGuard );
+
+        pRet = mpPool->popWork();
+
+        while( !pRet )
+        {
+            maNewWork.reset();
+
+            if( mpPool->mbTerminate )
+                break;
+
+            aGuard.clear(); // unlock
+
+            maNewWork.wait();
+
+            aGuard.reset(); // lock
+
+            pRet = mpPool->popWork();
+        }
+
+        return pRet;
+    }
+
+    //
+    // Why a condition per worker thread - you may ask.
+    //
+    // Unfortunately the Windows synchronisation API that we wrap
+    // is horribly inadequate cf.
+    //    http://www.cs.wustl.edu/~schmidt/win32-cv-1.html
+    // The existing osl::Condition API should only ever be used
+    // between one producer and one consumer thread to avoid the
+    // lost wakeup problem.
+    //
+    void signalNewWork()
+    {
+        maNewWork.set();
+    }
+};
+
+ThreadPool::ThreadPool( sal_Int32 nWorkers ) :
+    mbTerminate( false )
+{
+    for( sal_Int32 i = 0; i < nWorkers; i++ )
+        maWorkers.push_back( new ThreadWorker( this ) );
+
+    maTasksEmpty.reset();
+
+    osl::MutexGuard aGuard( maGuard );
+    for( size_t i = 0; i < maWorkers.size(); i++ )
+        maWorkers[ i ]->launch();
+}
+
+ThreadPool::~ThreadPool()
+{
+    waitUntilWorkersDone();
+}
+
+/// wait until all the workers have completed and
+/// terminate all threads
+void ThreadPool::waitUntilWorkersDone()
+{
+    waitUntilEmpty();
+
+    osl::ResettableMutexGuard aGuard( maGuard );
+    mbTerminate = true;
+
+    while( !maWorkers.empty() )
+    {
+        rtl::Reference< ThreadWorker > xWorker = maWorkers.back();
+        maWorkers.pop_back();
+        assert( maWorkers.find( xWorker ) == maWorkers.end() );
+        xWorker->signalNewWork();
+        aGuard.clear();
+        { // unlocked
+            xWorker->join();
+            xWorker.clear();
+        }
+        aGuard.reset();
+    }
+}
+
+void ThreadPool::pushTask( ThreadTask *pTask )
+{
+    osl::MutexGuard aGuard( maGuard );
+    maTasks.insert( maTasks.begin(), pTask );
+    // horrible beyond belief:
+    for( size_t i = 0; i < maWorkers.size(); i++ )
+        maWorkers[ i ]->signalNewWork();
+    maTasksEmpty.reset();
+}
+
+ThreadTask *ThreadPool::popWork()
+{
+    if( !maTasks.empty() )
+    {
+        ThreadTask *pTask = maTasks.back();
+        maTasks.pop_back();
+        return pTask;
+    }
+    else
+        maTasksEmpty.set();
+    return NULL;
+}
+
+void ThreadPool::waitUntilEmpty()
+{
+    osl::ResettableMutexGuard aGuard( maGuard );
+
+    if( maWorkers.empty() )
+    { // no threads at all -> execute the work in-line
+        ThreadTask *pTask;
+        while ( ( pTask = popWork() ) )
+        {
+            pTask->doWork();
+            delete pTask;
+        }
+        mbTerminate = true;
+    }
+    else
+    {
+        aGuard.clear();
+        maTasksEmpty.wait();
+        aGuard.reset();
+    }
+    assert( maTasks.empty() );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/oox/threadpool.hxx b/sc/source/filter/oox/threadpool.hxx
new file mode 100644
index 0000000..036534f
--- /dev/null
+++ b/sc/source/filter/oox/threadpool.hxx
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef SC_THREADPOOL_HXX
+#define SC_THREADPOOL_HXX
+
+#include <sal/config.h>
+#include <salhelper/thread.hxx>
+#include <osl/mutex.hxx>
+#include <osl/conditn.hxx>
+#include <rtl/ref.hxx>
+#include <vector>
+
+class ThreadTask
+{
+public:
+    virtual      ~ThreadTask() {}
+    virtual void doWork() = 0;
+};
+
+/// A very basic thread pool implementation
+class ThreadPool
+{
+public:
+                ThreadPool( sal_Int32 nWorkers );
+    virtual    ~ThreadPool();
+    void        pushTask( ThreadTask *pTask /* takes ownership */ );
+    void        waitUntilEmpty();
+    void        waitUntilWorkersDone();
+
+private:
+    class ThreadWorker;
+    friend class ThreadWorker;
+
+    ThreadTask *waitForWork( osl::Condition &rNewWork );
+    ThreadTask *popWork();
+
+    osl::Mutex maGuard;
+    osl::Condition maTasksEmpty;
+    bool mbTerminate;
+    std::vector< rtl::Reference< ThreadWorker > > maWorkers;
+    std::vector< ThreadTask * >   maTasks;
+};
+
+#endif // SC_THREADPOOL_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/oox/workbookfragment.cxx b/sc/source/filter/oox/workbookfragment.cxx
index e9884da..e666fa3 100644
--- a/sc/source/filter/oox/workbookfragment.cxx
+++ b/sc/source/filter/oox/workbookfragment.cxx
@@ -42,11 +42,16 @@
 #include "workbooksettings.hxx"
 #include "worksheetbuffer.hxx"
 #include "worksheetfragment.hxx"
+#include "sheetdatacontext.hxx"
+#include "threadpool.hxx"
+#include "officecfg/Office/Common.hxx"
 
 #include "document.hxx"
 #include "docsh.hxx"
 #include "calcconfig.hxx"
 
+#include <vcl/svapp.hxx>
+
 #include <oox/core/fastparser.hxx>
 #include <comphelper/processfactory.hxx>
 #include <officecfg/Office/Calc.hxx>
@@ -201,187 +206,80 @@ const RecordInfo* WorkbookFragment::getRecordInfos() const
 
 namespace {
 
-class WorkerThread;
-
 typedef std::pair<WorksheetGlobalsRef, FragmentHandlerRef> SheetFragmentHandler;
 typedef std::vector<SheetFragmentHandler> SheetFragmentVector;
-typedef rtl::Reference<WorkerThread> WorkerThreadRef;
-
-struct WorkerThreadData
-{
-    osl::Mutex maMtx;
-    std::vector<WorkerThreadRef> maThreads;
-};
 
-struct IdleWorkerThreadData
-{
-    osl::Mutex maMtx;
-    osl::Condition maCondAdded;
-    std::queue<WorkerThread*> maThreads;
-};
-
-struct
-{
-    boost::scoped_ptr<WorkerThreadData> mpWorkerThreads;
-    boost::scoped_ptr<IdleWorkerThreadData> mpIdleThreads;
-
-} aThreadGlobals;
-
-enum WorkerAction
-{
-    None = 0,
-    TerminateThread,
-    Work
-};
-
-class WorkerThread : public salhelper::Thread
+class WorkerThread : public ThreadTask
 {
     WorkbookFragment& mrWorkbookHandler;
-    size_t mnID;
-    FragmentHandlerRef mxHandler;
-    boost::scoped_ptr<oox::core::FastParser> mxParser;
-    osl::Mutex maMtxAction;
-    osl::Condition maCondActionChanged;
-    WorkerAction meAction;
-public:
-    WorkerThread( WorkbookFragment& rWorkbookHandler, size_t nID ) :
-        salhelper::Thread("sheet-import-worker-thread"),
-        mrWorkbookHandler(rWorkbookHandler),
-        mnID(nID),
-        mxParser(rWorkbookHandler.getOoxFilter().createParser()),
-        meAction(None) {}
-
-    virtual void execute()
-    {
-        announceIdle();
+    rtl::Reference<FragmentHandler> mxHandler;
 
-        // Keep looping until the terminate request is set.
-        for (maCondActionChanged.wait(); true; maCondActionChanged.wait())
-        {
-            osl::MutexGuard aGuard(maMtxAction);
-            if (!maCondActionChanged.check())
-                // Wait again.
-                continue;
-
-            maCondActionChanged.reset();
-
-            if (meAction == TerminateThread)
-                // End the thread.
-                return;
-
-            if (meAction != Work)
-                continue;
-
-#if 0
-            // TODO : This still deadlocks in the fast parser code.
-            mrWorkbookHandler.importOoxFragment(mxHandler, *mxParser);
-#else
-            double val = rand() / static_cast<double>(RAND_MAX);
-            val *= 1000000; // normalize to 1 second.
-            val *= 1.5; // inflate it a bit.
-            usleep(val); // pretend to be working while asleep.
-#endif
-            announceIdle();
-        }
-    }
-
-    void announceIdle()
-    {
-        // Set itself idle to receive a new task from the main thread.
-        osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx);
-        aThreadGlobals.mpIdleThreads->maThreads.push(this);
-        aThreadGlobals.mpIdleThreads->maCondAdded.set();
-    }
-
-    void terminate()
+public:
+    WorkerThread( WorkbookFragment& rWorkbookHandler,
+                  const rtl::Reference<FragmentHandler>& xHandler ) :
+        mrWorkbookHandler( rWorkbookHandler ),
+        mxHandler( xHandler )
     {
-        osl::MutexGuard aGuard(maMtxAction);
-        meAction = TerminateThread;
-        maCondActionChanged.set();
     }
 
-    void assign( const FragmentHandlerRef& rHandler )
+    virtual void doWork()
     {
-        osl::MutexGuard aGuard(maMtxAction);
-        mxHandler = rHandler;
-        meAction = Work;
-        maCondActionChanged.set();
+        // We hold the solar mutex in all threads except for
+        // the small safe section of the inner loop in
+        // sheetdatacontext.cxx
+        SAL_INFO( "sc.filter",  "start wait on solar\n" );
+        SolarMutexGuard maGuard;
+        SAL_INFO( "sc.filter",  "got solar\n" );
+
+        boost::scoped_ptr<oox::core::FastParser> xParser(
+                mrWorkbookHandler.getOoxFilter().createParser() );
+
+        SAL_INFO( "sc.filter",  "start import\n" );
+        mrWorkbookHandler.importOoxFragment( mxHandler, *xParser );
+        SAL_INFO( "sc.filter",  "end import, release solar\n" );
     }
 };
 
 void importSheetFragments( WorkbookFragment& rWorkbookHandler, SheetFragmentVector& rSheets )
 {
-#if 0 // threaded version
-    size_t nThreadCount = 3;
-    if (nThreadCount > rSheets.size())
-        nThreadCount = rSheets.size();
+    sal_Int32 nThreads = std::min( rSheets.size(), (size_t) 4 /* FIXME: ncpus/2 */ );
 
-    // Create new thread globals.
-    aThreadGlobals.mpWorkerThreads.reset(new WorkerThreadData);
-    aThreadGlobals.mpIdleThreads.reset(new IdleWorkerThreadData);
+    Reference< XComponentContext > xContext = comphelper::getProcessComponentContext();
 
-    SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end();
+    // Force threading off unless experimental mode or env. var is set.
+    if( !officecfg::Office::Common::Misc::ExperimentalMode::get( xContext ) )
+        nThreads = 0;
 
-    {
-        // Initialize worker threads.
-        osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx);
-        for (size_t i = 0; i < nThreadCount; ++i)
-        {
-            WorkerThreadRef pThread(new WorkerThread(rWorkbookHandler, i));
-            aThreadGlobals.mpWorkerThreads->maThreads.push_back(pThread);
-            pThread->launch();
-        }
-    }
+    const char *pEnv;
+    if( ( pEnv = getenv( "SC_IMPORT_THREADS" ) ) )
+        nThreads = rtl_str_toInt32( pEnv, 10 );
 
-    for (aThreadGlobals.mpIdleThreads->maCondAdded.wait(); true; aThreadGlobals.mpIdleThreads->maCondAdded.wait())
+    if( nThreads != 0 )
     {
-        osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx);
-        if (!aThreadGlobals.mpIdleThreads->maCondAdded.check())
-            // Wait again.
-            continue;
+        // test sequential read in this mode
+        if( nThreads < 0)
+            nThreads = 0;
+        ThreadPool aPool( nThreads );
 
-        aThreadGlobals.mpIdleThreads->maCondAdded.reset();
+        SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end();
+        for( ; it != itEnd; ++it )
+            aPool.pushTask( new WorkerThread( rWorkbookHandler, it->second ) )
+                ;
 
-        // Assign work to all idle threads.
-        while (!aThreadGlobals.mpIdleThreads->maThreads.empty())
         {
-            if (it == itEnd)
-                break;
-
-            WorkerThread* p = aThreadGlobals.mpIdleThreads->maThreads.front();
-            aThreadGlobals.mpIdleThreads->maThreads.pop();
-            p->assign(it->second);
-            ++it;
+            // Ideally no-one else but our worker threads can re-acquire that.
+            // potentially if that causes a problem we might want to extend
+            // the SolarMutex functionality to allow passing it around.
+            SolarMutexReleaser aReleaser;
+            aPool.waitUntilWorkersDone();
         }
-
-        if (it == itEnd)
-            // Finished!  Exit the loop.
-            break;
     }
-
-    {
-        // Terminate all worker threads.
-        osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx);
-        for (size_t i = 0, n = aThreadGlobals.mpWorkerThreads->maThreads.size(); i < n; ++i)
-        {
-            WorkerThreadRef pWorker = aThreadGlobals.mpWorkerThreads->maThreads[i];
-            pWorker->terminate();
-            if (pWorker.is())
-                pWorker->join();
-        }
-    }
-
-    // Delete all thread globals.
-    aThreadGlobals.mpWorkerThreads.reset();
-    aThreadGlobals.mpIdleThreads.reset();
-
-#else // non-threaded version
-    for( SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); it != itEnd; ++it)
+    else
     {
-        // import the sheet fragment
-        rWorkbookHandler.importOoxFragment(it->second);
+        SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end();
+        for( ; it != itEnd; ++it )
+            rWorkbookHandler.importOoxFragment( it->second );
     }
-#endif
 }
 
 }
commit 934941a4cf7c9ee7df69f03e6f0be246499d587f
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Fri Nov 22 20:57:40 2013 -0500

    Allow worker threads to use their own FastParser instances.
    
    To prevent deadlock during threaded sheet stream parsing.  It now
    deadlocks at a different place.
    
    Conflicts:
    	oox/source/core/xmlfilterbase.cxx
    	sc/source/filter/oox/workbookfragment.cxx
    
    Change-Id: I0ba0f2c9a257e71b0a340ab14e369b06d5fd8829

diff --git a/include/oox/core/xmlfilterbase.hxx b/include/oox/core/xmlfilterbase.hxx
index 87234fb..76eb091 100644
--- a/include/oox/core/xmlfilterbase.hxx
+++ b/include/oox/core/xmlfilterbase.hxx
@@ -56,8 +56,7 @@ namespace oox {
 namespace core {
 
 class FragmentHandler;
-
-// ============================================================================
+class FastParser;
 
 struct TextField {
     com::sun::star::uno::Reference< com::sun::star::text::XText > xText;
@@ -107,7 +106,8 @@ public:
 
         @return  True, if the fragment could be imported.
      */
-    bool                importFragment( const ::rtl::Reference< FragmentHandler >& rxHandler );
+    bool importFragment( const rtl::Reference<FragmentHandler>& rxHandler );
+    bool importFragment( const rtl::Reference<FragmentHandler>& rxHandler, FastParser& rParser );
 
     /** Imports a fragment into an xml::dom::XDocument.
 
@@ -231,6 +231,8 @@ public:
 
     void importDocumentProperties();
 
+    FastParser* createParser() const;
+
 protected:
     virtual ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream >
         implGetInputStream( utl::MediaDescriptor& rMediaDesc ) const;
diff --git a/oox/source/core/xmlfilterbase.cxx b/oox/source/core/xmlfilterbase.cxx
index 98c8886..d1ae6a5 100644
--- a/oox/source/core/xmlfilterbase.cxx
+++ b/oox/source/core/xmlfilterbase.cxx
@@ -74,12 +74,6 @@ using utl::MediaDescriptor;
 using ::sax_fastparser::FSHelperPtr;
 using ::sax_fastparser::FastSerializerHelper;
 
-
-
-
-
-// ============================================================================
-
 namespace {
 
 bool lclHasSuffix( const OUString& rFragmentPath, const OUString& rSuffix )
@@ -88,9 +82,77 @@ bool lclHasSuffix( const OUString& rFragmentPath, const OUString& rSuffix )
     return (nSuffixPos >= 0) && rFragmentPath.match( rSuffix, nSuffixPos );
 }
 
-} // namespace
+struct NamespaceIds: public rtl::StaticWithInit<
+    Sequence< beans::Pair< OUString, sal_Int32 > >,
+    NamespaceIds>
+{
+    Sequence< beans::Pair< OUString, sal_Int32 > > operator()()
+    {
+        static const char* const namespaceURIs[] = {
+            "http://www.w3.org/XML/1998/namespace",
+            "http://schemas.openxmlformats.org/package/2006/relationships",
+            "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
+            "http://schemas.openxmlformats.org/drawingml/2006/main",
+            "http://schemas.openxmlformats.org/drawingml/2006/diagram",
+            "http://schemas.openxmlformats.org/drawingml/2006/chart",
+            "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
+            "urn:schemas-microsoft-com:vml",
+            "urn:schemas-microsoft-com:office:office",
+            "urn:schemas-microsoft-com:office:word",
+            "urn:schemas-microsoft-com:office:excel",
+            "urn:schemas-microsoft-com:office:powerpoint",
+            "http://schemas.microsoft.com/office/2006/activeX",
+            "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
+            "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
+            "http://schemas.microsoft.com/office/excel/2006/main",
+            "http://schemas.openxmlformats.org/presentationml/2006/main",
+            "http://schemas.openxmlformats.org/markup-compatibility/2006",
+            "http://schemas.openxmlformats.org/spreadsheetml/2006/main/v2",
+            "http://schemas.microsoft.com/office/drawing/2008/diagram",
+            "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"
+        };
+
+        static const sal_Int32 namespaceIds[] = {
+            NMSP_xml,
+            NMSP_packageRel,
+            NMSP_officeRel,
+            NMSP_dml,
+            NMSP_dmlDiagram,
+            NMSP_dmlChart,
+            NMSP_dmlChartDr,
+            NMSP_dmlSpreadDr,
+            NMSP_vml,
+            NMSP_vmlOffice,
+            NMSP_vmlWord,
+            NMSP_vmlExcel,
+            NMSP_vmlPowerpoint,
+            NMSP_xls,
+            NMSP_ppt,
+            NMSP_ax,
+            NMSP_xm,
+            NMSP_mce,
+            NMSP_mceTest,
+            NMSP_dsp,
+            NMSP_xlsExtLst
+        };
+
+        Sequence< beans::Pair< OUString, sal_Int32 > > aRet(STATIC_ARRAY_SIZE(namespaceIds));
+        for( sal_Int32 i=0; i<aRet.getLength(); ++i )
+            aRet[i] = make_Pair(
+                OUString::createFromAscii(namespaceURIs[i]),
+                namespaceIds[i]);
+        return aRet;
+    }
+};
 
-// ============================================================================
+void registerNamespaces( FastParser& rParser )
+{
+    const Sequence< beans::Pair<OUString, sal_Int32> > ids = NamespaceIds::get();
+    for (sal_Int32 i = 0; i < ids.getLength(); ++i)
+        rParser.registerNamespace(ids[i].Second);
+}
+
+} // namespace
 
 struct XmlFilterBaseImpl
 {
@@ -105,75 +167,6 @@ struct XmlFilterBaseImpl
     explicit            XmlFilterBaseImpl( const Reference< XComponentContext >& rxContext ) throw( RuntimeException );
 };
 
-// ----------------------------------------------------------------------------
-
-namespace
-{
-    struct NamespaceIds: public rtl::StaticWithInit<
-        Sequence< beans::Pair< OUString, sal_Int32 > >,
-        NamespaceIds>
-    {
-        Sequence< beans::Pair< OUString, sal_Int32 > > operator()()
-        {
-            static const char* const namespaceURIs[] = {
-                "http://www.w3.org/XML/1998/namespace",
-                "http://schemas.openxmlformats.org/package/2006/relationships",
-                "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
-                "http://schemas.openxmlformats.org/drawingml/2006/main",
-                "http://schemas.openxmlformats.org/drawingml/2006/diagram",
-                "http://schemas.openxmlformats.org/drawingml/2006/chart",
-                "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
-                "urn:schemas-microsoft-com:vml",
-                "urn:schemas-microsoft-com:office:office",
-                "urn:schemas-microsoft-com:office:word",
-                "urn:schemas-microsoft-com:office:excel",
-                "urn:schemas-microsoft-com:office:powerpoint",
-                "http://schemas.microsoft.com/office/2006/activeX",
-                "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
-                "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
-                "http://schemas.microsoft.com/office/excel/2006/main",
-                "http://schemas.openxmlformats.org/presentationml/2006/main",
-                "http://schemas.openxmlformats.org/markup-compatibility/2006",
-                "http://schemas.openxmlformats.org/spreadsheetml/2006/main/v2",
-                "http://schemas.microsoft.com/office/drawing/2008/diagram",
-                "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"
-            };
-
-            static const sal_Int32 namespaceIds[] = {
-                NMSP_xml,
-                NMSP_packageRel,
-                NMSP_officeRel,
-                NMSP_dml,
-                NMSP_dmlDiagram,
-                NMSP_dmlChart,
-                NMSP_dmlChartDr,
-                NMSP_dmlSpreadDr,
-                NMSP_vml,
-                NMSP_vmlOffice,
-                NMSP_vmlWord,
-                NMSP_vmlExcel,
-                NMSP_vmlPowerpoint,
-                NMSP_xls,
-                NMSP_ppt,
-                NMSP_ax,
-                NMSP_xm,
-                NMSP_mce,
-                NMSP_mceTest,
-                NMSP_dsp,
-                NMSP_xlsExtLst
-            };
-
-            Sequence< beans::Pair< OUString, sal_Int32 > > aRet(STATIC_ARRAY_SIZE(namespaceIds));
-            for( sal_Int32 i=0; i<aRet.getLength(); ++i )
-                aRet[i] = make_Pair(
-                    OUString::createFromAscii(namespaceURIs[i]),
-                    namespaceIds[i]);
-            return aRet;
-        }
-    };
-}
-
-// ----------------------------------------------------------------------------
 
 XmlFilterBaseImpl::XmlFilterBaseImpl( const Reference< XComponentContext >& rxContext ) throw( RuntimeException ) :
     maFastParser( rxContext ),
@@ -181,10 +174,7 @@ XmlFilterBaseImpl::XmlFilterBaseImpl( const Reference< XComponentContext >& rxCo
     maVmlSuffix( ".vml" )
 {
     // register XML namespaces
-    const Sequence< beans::Pair< OUString, sal_Int32 > > ids=
-        NamespaceIds::get();
-    for( sal_Int32 i=0; i<ids.getLength(); ++i )
-        maFastParser.registerNamespace( ids[i].Second );
+    registerNamespaces(maFastParser);
 }
 
 XmlFilterBase::XmlFilterBase( const Reference< XComponentContext >& rxContext ) throw( RuntimeException ) :
@@ -220,13 +210,25 @@ void XmlFilterBase::importDocumentProperties()
     xImporter->importProperties( xDocumentStorage, xPropSupplier->getDocumentProperties() );
 }
 
+FastParser* XmlFilterBase::createParser() const
+{
+    FastParser* pParser = new FastParser(getComponentContext());
+    registerNamespaces(*pParser);
+    return pParser;
+}
+
 OUString XmlFilterBase::getFragmentPathFromFirstType( const OUString& rType )
 {
     // importRelations() caches the relations map for subsequence calls
     return importRelations( OUString() )->getFragmentPathFromFirstType( rType );
 }
 
-bool XmlFilterBase::importFragment( const ::rtl::Reference< FragmentHandler >& rxHandler )
+bool XmlFilterBase::importFragment( const rtl::Reference<FragmentHandler>& rxHandler )
+{
+    return importFragment(rxHandler, mxImpl->maFastParser);
+}
+
+bool XmlFilterBase::importFragment( const rtl::Reference<FragmentHandler>& rxHandler, FastParser& rParser )
 {
     OSL_ENSURE( rxHandler.is(), "XmlFilterBase::importFragment - missing fragment handler" );
     if( !rxHandler.is() )
@@ -280,8 +282,8 @@ bool XmlFilterBase::importFragment( const ::rtl::Reference< FragmentHandler >& r
         // own try/catch block for showing parser failure assertion with fragment path
         if( xInStrm.is() ) try
         {
-            mxImpl->maFastParser.setDocumentHandler( xDocHandler );
-            mxImpl->maFastParser.parseStream( xInStrm, aFragmentPath );
+            rParser.setDocumentHandler(xDocHandler);
+            rParser.parseStream(xInStrm, aFragmentPath);
             return true;
         }
         catch( Exception& )
diff --git a/sc/source/filter/inc/workbookhelper.hxx b/sc/source/filter/inc/workbookhelper.hxx
index abafb20..1f472c3 100644
--- a/sc/source/filter/inc/workbookhelper.hxx
+++ b/sc/source/filter/inc/workbookhelper.hxx
@@ -53,6 +53,7 @@ namespace oox { namespace core {
     class FilterBase;
     class FragmentHandler;
     class XmlFilterBase;
+    class FastParser;
 } }
 
 class ScDocument;
@@ -269,7 +270,10 @@ public:
 
     /** Imports a fragment using the passed fragment handler, which contains
         the full path to the fragment stream. */
-    bool                importOoxFragment( const ::rtl::Reference< ::oox::core::FragmentHandler >& rxHandler );
+    bool importOoxFragment( const rtl::Reference<oox::core::FragmentHandler>& rxHandler );
+
+    bool importOoxFragment( const rtl::Reference<oox::core::FragmentHandler>& rxHandler, oox::core::FastParser& rParser );
+
 
     // BIFF2-BIFF8 specific (MUST NOT be called in OOXML/BIFF12 filter) -------
 
diff --git a/sc/source/filter/oox/workbookfragment.cxx b/sc/source/filter/oox/workbookfragment.cxx
index 485642c..e9884da 100644
--- a/sc/source/filter/oox/workbookfragment.cxx
+++ b/sc/source/filter/oox/workbookfragment.cxx
@@ -48,6 +48,8 @@
 #include "calcconfig.hxx"
 
 #include <oox/core/fastparser.hxx>
+#include <comphelper/processfactory.hxx>
+#include <officecfg/Office/Calc.hxx>
 #include <salhelper/thread.hxx>
 #include <osl/conditn.hxx>
 
@@ -197,6 +199,193 @@ const RecordInfo* WorkbookFragment::getRecordInfos() const
     return spRecInfos;
 }
 
+namespace {
+
+class WorkerThread;
+
+typedef std::pair<WorksheetGlobalsRef, FragmentHandlerRef> SheetFragmentHandler;
+typedef std::vector<SheetFragmentHandler> SheetFragmentVector;
+typedef rtl::Reference<WorkerThread> WorkerThreadRef;
+
+struct WorkerThreadData
+{
+    osl::Mutex maMtx;
+    std::vector<WorkerThreadRef> maThreads;
+};
+
+struct IdleWorkerThreadData
+{
+    osl::Mutex maMtx;
+    osl::Condition maCondAdded;
+    std::queue<WorkerThread*> maThreads;
+};
+
+struct
+{
+    boost::scoped_ptr<WorkerThreadData> mpWorkerThreads;
+    boost::scoped_ptr<IdleWorkerThreadData> mpIdleThreads;
+
+} aThreadGlobals;
+
+enum WorkerAction
+{
+    None = 0,
+    TerminateThread,
+    Work
+};
+
+class WorkerThread : public salhelper::Thread
+{
+    WorkbookFragment& mrWorkbookHandler;
+    size_t mnID;
+    FragmentHandlerRef mxHandler;
+    boost::scoped_ptr<oox::core::FastParser> mxParser;
+    osl::Mutex maMtxAction;
+    osl::Condition maCondActionChanged;
+    WorkerAction meAction;
+public:
+    WorkerThread( WorkbookFragment& rWorkbookHandler, size_t nID ) :
+        salhelper::Thread("sheet-import-worker-thread"),
+        mrWorkbookHandler(rWorkbookHandler),
+        mnID(nID),
+        mxParser(rWorkbookHandler.getOoxFilter().createParser()),
+        meAction(None) {}
+
+    virtual void execute()
+    {
+        announceIdle();
+
+        // Keep looping until the terminate request is set.
+        for (maCondActionChanged.wait(); true; maCondActionChanged.wait())
+        {
+            osl::MutexGuard aGuard(maMtxAction);
+            if (!maCondActionChanged.check())
+                // Wait again.
+                continue;
+
+            maCondActionChanged.reset();
+
+            if (meAction == TerminateThread)
+                // End the thread.
+                return;
+
+            if (meAction != Work)
+                continue;
+
+#if 0
+            // TODO : This still deadlocks in the fast parser code.
+            mrWorkbookHandler.importOoxFragment(mxHandler, *mxParser);
+#else
+            double val = rand() / static_cast<double>(RAND_MAX);
+            val *= 1000000; // normalize to 1 second.
+            val *= 1.5; // inflate it a bit.
+            usleep(val); // pretend to be working while asleep.
+#endif
+            announceIdle();
+        }
+    }
+
+    void announceIdle()
+    {
+        // Set itself idle to receive a new task from the main thread.
+        osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx);
+        aThreadGlobals.mpIdleThreads->maThreads.push(this);
+        aThreadGlobals.mpIdleThreads->maCondAdded.set();
+    }
+
+    void terminate()
+    {
+        osl::MutexGuard aGuard(maMtxAction);
+        meAction = TerminateThread;
+        maCondActionChanged.set();
+    }
+
+    void assign( const FragmentHandlerRef& rHandler )
+    {
+        osl::MutexGuard aGuard(maMtxAction);
+        mxHandler = rHandler;
+        meAction = Work;
+        maCondActionChanged.set();
+    }
+};
+
+void importSheetFragments( WorkbookFragment& rWorkbookHandler, SheetFragmentVector& rSheets )
+{
+#if 0 // threaded version
+    size_t nThreadCount = 3;
+    if (nThreadCount > rSheets.size())
+        nThreadCount = rSheets.size();
+
+    // Create new thread globals.
+    aThreadGlobals.mpWorkerThreads.reset(new WorkerThreadData);
+    aThreadGlobals.mpIdleThreads.reset(new IdleWorkerThreadData);
+
+    SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end();
+
+    {
+        // Initialize worker threads.
+        osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx);
+        for (size_t i = 0; i < nThreadCount; ++i)
+        {
+            WorkerThreadRef pThread(new WorkerThread(rWorkbookHandler, i));
+            aThreadGlobals.mpWorkerThreads->maThreads.push_back(pThread);
+            pThread->launch();
+        }
+    }
+
+    for (aThreadGlobals.mpIdleThreads->maCondAdded.wait(); true; aThreadGlobals.mpIdleThreads->maCondAdded.wait())
+    {
+        osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx);
+        if (!aThreadGlobals.mpIdleThreads->maCondAdded.check())
+            // Wait again.
+            continue;
+
+        aThreadGlobals.mpIdleThreads->maCondAdded.reset();
+
+        // Assign work to all idle threads.
+        while (!aThreadGlobals.mpIdleThreads->maThreads.empty())
+        {
+            if (it == itEnd)
+                break;
+
+            WorkerThread* p = aThreadGlobals.mpIdleThreads->maThreads.front();
+            aThreadGlobals.mpIdleThreads->maThreads.pop();
+            p->assign(it->second);
+            ++it;
+        }
+
+        if (it == itEnd)
+            // Finished!  Exit the loop.
+            break;
+    }
+
+    {
+        // Terminate all worker threads.
+        osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx);
+        for (size_t i = 0, n = aThreadGlobals.mpWorkerThreads->maThreads.size(); i < n; ++i)
+        {
+            WorkerThreadRef pWorker = aThreadGlobals.mpWorkerThreads->maThreads[i];
+            pWorker->terminate();
+            if (pWorker.is())
+                pWorker->join();
+        }
+    }
+
+    // Delete all thread globals.
+    aThreadGlobals.mpWorkerThreads.reset();
+    aThreadGlobals.mpIdleThreads.reset();
+
+#else // non-threaded version
+    for( SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); it != itEnd; ++it)
+    {
+        // import the sheet fragment
+        rWorkbookHandler.importOoxFragment(it->second);
+    }
+#endif
+}
+
+}
+
 void WorkbookFragment::finalizeImport()
 {
     ISegmentProgressBarRef xGlobalSegment = getProgressBar().createSegment( PROGRESS_LENGTH_GLOBALS );
@@ -318,11 +507,7 @@ void WorkbookFragment::finalizeImport()
     }
 
     // load all worksheets
-    for( SheetFragmentVector::iterator aIt = aSheetFragments.begin(), aEnd = aSheetFragments.end(); aIt != aEnd; ++aIt )
-    {
-        // import the sheet fragment
-        importOoxFragment( aIt->second );
-    }
+    importSheetFragments(*this, aSheetFragments);
 
     for( std::vector<WorksheetHelper*>::iterator aIt = maHelpers.begin(), aEnd = maHelpers.end(); aIt != aEnd; ++aIt )
     {
diff --git a/sc/source/filter/oox/workbookhelper.cxx b/sc/source/filter/oox/workbookhelper.cxx
index 0fca151..32c8bd6 100644
--- a/sc/source/filter/oox/workbookhelper.cxx
+++ b/sc/source/filter/oox/workbookhelper.cxx
@@ -1012,11 +1012,16 @@ XmlFilterBase& WorkbookHelper::getOoxFilter() const
     return mrBookGlob.getOoxFilter();
 }
 
-bool WorkbookHelper::importOoxFragment( const ::rtl::Reference< FragmentHandler >& rxHandler )
+bool WorkbookHelper::importOoxFragment( const rtl::Reference<FragmentHandler>& rxHandler )
 {
     return getOoxFilter().importFragment( rxHandler );
 }
 
+bool WorkbookHelper::importOoxFragment( const rtl::Reference<FragmentHandler>& rxHandler, oox::core::FastParser& rParser )
+{
+    return getOoxFilter().importFragment(rxHandler, rParser);
+}
+
 // BIFF specific --------------------------------------------------------------
 
 BiffType WorkbookHelper::getBiff() const


More information about the Libreoffice-commits mailing list