[Libreoffice-commits] core.git: Branch 'feature/saxparser' - 3 commits - sax/source

Matúš Kukan matus.kukan at gmail.com
Mon Oct 14 06:11:05 PDT 2013


 sax/source/fastparser/fastparser.cxx |   74 +++++++++++------------------------
 sax/source/fastparser/fastparser.hxx |   28 +++++--------
 2 files changed, 35 insertions(+), 67 deletions(-)

New commits:
commit b19ca9b8da5e269b25f86e0510eecb91d726489f
Author: Matúš Kukan <matus.kukan at gmail.com>
Date:   Mon Oct 14 13:22:15 2013 +0200

    fix FIXME: use static const.. in class
    
    Should be done in 0bb5dfba049b085619e3b6c14fefb8ef3e69133a
    
    Change-Id: Ic68a2edc3a74dcf0b49f786667eb1ea1ac9445d2

diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 9918a8a..8941b0e 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -773,21 +773,18 @@ void FastSaxParser::deleteUsedEvents()
     }
 }
 
-// FIXME: make this a static const size_t in the class ...
-#define SIZE 1000
-
 void FastSaxParser::produce(const Event& aEvent)
 {
     Entity& rEntity = getEntity();
     if (!rEntity.mpProducedEvents)
     {
         rEntity.mpProducedEvents = new EventList();
-        rEntity.mpProducedEvents->reserve(SIZE);
+        rEntity.mpProducedEvents->reserve(rEntity.mnEventListSize);
     }
     rEntity.mpProducedEvents->push_back( aEvent );
     if (aEvent.maType == CallbackType::DONE ||
         aEvent.maType == CallbackType::EXCEPTION ||
-        rEntity.mpProducedEvents->size() == SIZE)
+        rEntity.mpProducedEvents->size() == rEntity.mnEventListSize)
     {
         osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
 
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 6f04500..184f4ef 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -108,6 +108,8 @@ struct Entity : public ParserData
 
     static const size_t mnEventLowWater = 4;
     static const size_t mnEventHighWater = 8;
+    // Amount of work producer sends to consumer in one iteration:
+    static const size_t mnEventListSize = 1000;
     osl::Condition maConsumeResume;
     osl::Condition maProduceResume;
 
commit a56398bcb3d3b4605b175615593e986e5d963d18
Author: Matúš Kukan <matus.kukan at gmail.com>
Date:   Mon Oct 14 13:14:53 2013 +0200

    FastSaxParser: free memory in ParserThread as much as possible
    
    To avoid leaks, last few (one?) EventLists are freed in main thread
    after parsing is finished.
    
    Change-Id: I9b82107584e1f6b2fe1d4d035b7771ac604a86df

diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 13451db..9918a8a 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -588,6 +588,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
             }
         } while (!done);
         xParser->join();
+        deleteUsedEvents();
 
         // finish document
         if( entity.mxDocumentHandler.is() )
@@ -772,12 +773,8 @@ void FastSaxParser::deleteUsedEvents()
     }
 }
 
-// freeing in producer thread will leak one EventList
-
 // FIXME: make this a static const size_t in the class ...
 #define SIZE 1000
-// FIXME: kill this - it should be in the parse thread I'm convinced :-)
-#define FREE_IN_MAIN_THREAD 0
 
 void FastSaxParser::produce(const Event& aEvent)
 {
@@ -809,9 +806,7 @@ void FastSaxParser::produce(const Event& aEvent)
 
         rEntity.maConsumeResume.set();
 
-#if !FREE_IN_MAIN_THREAD
         deleteUsedEvents();
-#endif
     }
 }
 
@@ -862,9 +857,6 @@ bool FastSaxParser::consume(EventList *pEventList)
         }
     }
     rEntity.maUsedEvents.push(pEventList);
-#if FREE_IN_MAIN_THREAD
-    deleteUsedEvents();
-#endif
     return !bIsParserFinished;
 }
 
@@ -916,9 +908,6 @@ void FastSaxParser::parse()
     }
     while( nRead > 0 );
     produce(Event( CallbackType::DONE ));
-#if !FREE_IN_MAIN_THREAD
-    deleteUsedEvents();
-#endif
 }
 
 //------------------------------------------
commit 5610ad37c25c8cbc5923171efe271a0ec8ddc589
Author: Matúš Kukan <matus.kukan at gmail.com>
Date:   Mon Oct 14 12:51:34 2013 +0200

    FastSaxParser: change EventList to be vector of Events, not pointers
    
    So that EventList::reserve makes more sense.
    Also use boost::optional in Event instead of pointers.
    It's much faster this way - not sure which part helped more.
    
    Change-Id: I6560b7c7f50d62b7ec83f432bed2550a5a546db6

diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index de16108..13451db 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -80,7 +80,7 @@ private:
         }
         catch (const SAXParseException& e)
         {
-            mpParser->produce(new Event( CallbackType::EXCEPTION ));
+            mpParser->produce(Event( CallbackType::EXCEPTION ));
         }
     }
 };
@@ -191,34 +191,25 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
 
 // --------------------------------------------------------------------
 
-Event::argCharacters::argCharacters(const OUString& sChars): msChars(sChars)
-{}
-
-Event::argStart::argStart(sal_Int32 nElementToken, const OUString& aNamespace,
-        const OUString& aElementName, FastAttributeList *pAttributes):
-    mnElementToken(nElementToken), maNamespace(aNamespace),
-    maElementName(aElementName), mpAttributes(pAttributes)
-{}
-
-Event::Event(const CallbackType& t): maType(t), mpArgCharacters (0), mpArgStart(0)
+Event::Event(const CallbackType& t): maType(t)
 {}
 
 Event::Event(const CallbackType& t, const OUString& sChars): Event(t)
 {
-    mpArgCharacters = new argCharacters(sChars);
+    msChars = sChars;
 }
 
 Event::Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
         const OUString& aElementName, FastAttributeList *pAttributes): Event(t)
 {
-    mpArgStart = new argStart(nElementToken, aNamespace, aElementName, pAttributes);
+    mnElementToken = nElementToken;
+    maNamespace = aNamespace;
+    maElementName = aElementName;
+    mpAttributes = pAttributes;
 }
 
 Event::~Event()
-{
-    delete mpArgCharacters;
-    delete mpArgStart;
-}
+{}
 
 // --------------------------------------------------------------------
 
@@ -575,7 +566,6 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
         rtl::Reference<ParserThread> xParser;
         xParser = new ParserThread(this);
         xParser->launch();
-        EventList *pEventList = 0;
         bool done = false;
         do {
             rEntity.maConsumeResume.wait();
@@ -587,7 +577,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
                 if (rEntity.maPendingEvents.size() <= rEntity.mnEventLowWater)
                     rEntity.maProduceResume.set(); // start producer again
 
-                pEventList = rEntity.maPendingEvents.front();
+                EventList *pEventList = rEntity.maPendingEvents.front();
                 rEntity.maPendingEvents.pop();
                 aGuard.clear(); // unlock
 
@@ -776,8 +766,6 @@ void FastSaxParser::deleteUsedEvents()
         rEntity.maUsedEvents.pop();
         aGuard.clear(); // unlock
 
-        for (size_t i = 0; i < pEventList->size(); ++i)
-            delete (*pEventList)[i];
         delete pEventList;
 
         aGuard.reset(); // lock
@@ -791,7 +779,7 @@ void FastSaxParser::deleteUsedEvents()
 // FIXME: kill this - it should be in the parse thread I'm convinced :-)
 #define FREE_IN_MAIN_THREAD 0
 
-void FastSaxParser::produce(Event *pEvent)
+void FastSaxParser::produce(const Event& aEvent)
 {
     Entity& rEntity = getEntity();
     if (!rEntity.mpProducedEvents)
@@ -799,9 +787,9 @@ void FastSaxParser::produce(Event *pEvent)
         rEntity.mpProducedEvents = new EventList();
         rEntity.mpProducedEvents->reserve(SIZE);
     }
-    rEntity.mpProducedEvents->push_back( pEvent );
-    if (pEvent->maType == CallbackType::DONE ||
-        pEvent->maType == CallbackType::EXCEPTION ||
+    rEntity.mpProducedEvents->push_back( aEvent );
+    if (aEvent.maType == CallbackType::DONE ||
+        aEvent.maType == CallbackType::EXCEPTION ||
         rEntity.mpProducedEvents->size() == SIZE)
     {
         osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
@@ -834,17 +822,17 @@ bool FastSaxParser::consume(EventList *pEventList)
     for (EventList::const_iterator aEventIt = pEventList->begin();
             aEventIt != pEventList->end(); ++aEventIt)
     {
-        switch ((*aEventIt)->maType)
+        switch ((*aEventIt).maType)
         {
             case CallbackType::START_ELEMENT:
-                rEntity.startElement( (*aEventIt)->mpArgStart->mnElementToken, (*aEventIt)->mpArgStart->maNamespace,
-                        (*aEventIt)->mpArgStart->maElementName, (*aEventIt)->mpArgStart->mpAttributes );
+                rEntity.startElement( (*aEventIt).mnElementToken.get(), (*aEventIt).maNamespace.get(),
+                        (*aEventIt).maElementName.get(), (*aEventIt).mpAttributes.get() );
                 break;
             case CallbackType::END_ELEMENT:
                 rEntity.endElement();
                 break;
             case CallbackType::CHARACTERS:
-                rEntity.characters( (*aEventIt)->mpArgCharacters->msChars );
+                rEntity.characters( (*aEventIt).msChars.get() );
                 break;
             case CallbackType::DONE:
                 bIsParserFinished = true;
@@ -927,7 +915,7 @@ void FastSaxParser::parse()
         }
     }
     while( nRead > 0 );
-    produce(new Event( CallbackType::DONE ));
+    produce(Event( CallbackType::DONE ));
 #if !FREE_IN_MAIN_THREAD
     deleteUsedEvents();
 #endif
@@ -1042,7 +1030,7 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
             }
 
         rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
-        produce(new Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
+        produce(Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
                     OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes ));
     }
     catch (const Exception& e)
@@ -1062,13 +1050,13 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
     if( !rEntity.maNamespaceStack.empty() )
         rEntity.maNamespaceStack.pop();
 
-    produce(new Event( CallbackType::END_ELEMENT ));
+    produce(Event( CallbackType::END_ELEMENT ));
 }
 
 
 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
 {
-    produce(new Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) ));
+    produce(Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) ));
 }
 
 void FastSaxParser::callbackEntityDecl(
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 1fb8c7a..6f04500 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -23,8 +23,9 @@
 #include <queue>
 #include <vector>
 #include <stack>
-#include <boost/unordered_map.hpp>
+#include <boost/optional.hpp>
 #include <boost/shared_ptr.hpp>
+#include <boost/unordered_map.hpp>
 #include <osl/conditn.hxx>
 #include <rtl/ref.hxx>
 #include <com/sun/star/xml/sax/XFastParser.hpp>
@@ -53,7 +54,7 @@ typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef;
 typedef ::boost::unordered_map< OUString, sal_Int32,
         OUStringHash, ::std::equal_to< OUString > > NamespaceMap;
 
-typedef std::vector<Event *> EventList;
+typedef std::vector<Event> EventList;
 
 enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
 
@@ -66,21 +67,12 @@ struct NameWithToken
 };
 
 struct Event {
-    struct argCharacters {
-        OUString msChars;
-        argCharacters(const OUString& sChars);
-    };
-    struct argStart {
-        sal_Int32 mnElementToken;
-        OUString maNamespace;
-        OUString maElementName;
-        FastAttributeList *mpAttributes;
-        argStart(sal_Int32 nElementToken, const OUString& aNamespace,
-                const OUString& aElementName, FastAttributeList *pAttributes);
-    };
+    boost::optional< OUString > msChars;
+    boost::optional< sal_Int32 > mnElementToken;
+    boost::optional< OUString > maNamespace;
+    boost::optional< OUString > maElementName;
+    boost::optional< FastAttributeList * > mpAttributes;
     CallbackType maType;
-    argCharacters *mpArgCharacters;
-    argStart *mpArgStart;
     Event(const CallbackType& t);
     Event(const CallbackType& t, const OUString& sChars);
     Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
@@ -185,7 +177,7 @@ public:
     inline void popEntity()                         { maEntities.pop(); }
     Entity& getEntity()                             { return maEntities.top(); }
     void parse();
-    void produce(Event *);
+    void produce( const Event& );
 
 private:
     bool consume(EventList *);


More information about the Libreoffice-commits mailing list