[Libreoffice-commits] core.git: Branch 'feature/saxparser' - 448 commits - avmedia/source basctl/source basegfx/source basic/qa basic/source bin/find-unusedheaders.pl bridges/test canvas/source chart2/inc chart2/source comphelper/source config_host/config_features.h.in config_host.mk.in configmgr/source configure.ac connectivity/inc connectivity/source connectivity/workben cppuhelper/source cui/inc cui/source dbaccess/source desktop/Extension_test-passive.mk desktop/source distro-configs/LibreOfficeAndroid.conf distro-configs/LibreOfficeAndroidX86.conf distro-configs/LibreOfficeLinux.conf distro-configs/LibreOfficeMacOSX.conf distro-configs/LibreOfficeOpenBSD.conf distro-configs/LibreOfficeWin32.conf distro-configs/OxygenOfficeLinux.conf distro-configs/OxygenOfficeWin32.conf distro-configs/README download.lst drawinglayer/inc dtrans/source editeng/source extensions/source external/CustomTarget_jawt.mk external/glibc external/libpng external/mingw-externals external/Module_external.mk exte rnal/msc-externals external/Package_dbghelp.mk external/Package_jawt.mk external/Package_mingw_dlls.mk external/Package_mingw_gccdlls.mk external/Package_msms.mk external/Package_msvc80_dlls.mk external/Package_msvc_dlls.mk external/StaticLibrary_gnu_readdir_r.mk external/UnpackedTarball_glibc.mk extras/source filter/source forms/source formula/source fpicker/test framework/inc framework/source helpcontent2 i18nlangtag/qa i18nlangtag/source icon-themes/galaxy icon-themes/hicontrast icu/icu4c.10318.CVE-2013-2924_changeset_34076.patch icu/UnpackedTarball_icu.mk idl/source include/basegfx include/comphelper include/filter include/formula include/helpcompiler include/oox include/osl include/rtl include/sal include/sax include/sfx2 include/svl include/svtools include/svx include/toolkit include/tools include/touch include/unotools include/vcl include/xmloff instsetoo_native/Module_instsetoo_native.mk ios/experimental ios/.gitignore ios/lo.xcconfig.in ios/MobileLibreOffice ios/sha red jvmfwk/plugins l10ntools/source libcdr/ExternalProject_libcdr.mk liblangtag/ExternalProject_langtag.mk libmspub/ExternalProject_libmspub.mk libmwaw/ExternalProject_libmwaw.mk libodfgen/ExternalProject_libodfgen.mk liborcus/ExternalProject_liborcus.mk libpng/configs libpng/Makefile libpng/Module_libpng.mk libpng/README libpng/StaticLibrary_png.mk libpng/UnpackedTarball_png.mk libvisio/ExternalProject_libvisio.mk libwpd/ExternalProject_libwpd.mk libwpg/ExternalProject_libwpg.mk libwps/ExternalProject_libwps.mk libxmlsec/ExternalProject_xmlsec.mk lingucomponent/Library_guesslang.mk lingucomponent/source lotuswordpro/source Makefile.fetch Makefile.in moz/ExternalPackage_moz_lib.mk mysqlc/source neon/Library_neon.mk odk/examples officecfg/Configuration_officecfg.mk officecfg/files.mk officecfg/registry oox/inc oox/Library_oox.mk oox/source package/source postgresql/ExternalProject_postgresql.mk postprocess/CustomTarget_registry.mk postprocess/Rdb_services.mk python3/ExternalP ackage_python3.mk python3/ExternalProject_python3.mk pyuno/source readlicense_oo/docs README.cross reportdesign/source RepositoryExternal.mk Repository.mk RepositoryModule_host.mk rsc/source sal/android sal/osl sal/qa sal/rtl sax/CppunitTest_sax_attributes.mk sax/CppunitTest_sax_parser.mk sax/Library_fastsax.mk sax/Module_sax.mk sax/qa sax/source sc/inc scp2/InstallModule_ooo.mk scp2/InstallModule_sdkoo.mk scp2/source sc/qa sc/source sd/AllLangResTarget_sd.mk sdext/Configuration_minimizer.mk sdext/Extension_minimizer.mk sdext/Library_minimizer.mk sdext/Library_PresentationMinimizer.mk sdext/Module_sdext.mk sdext/Rdb_minimizer.mk sdext/source sd/inc sd/qa sd/sdi sd/source sd/uiconfig setup_native/source sfx2/inc sfx2/sdi sfx2/source sfx2/uiconfig shell/source slideshow/inc smoketest/Extension_TestExtension.mk solenv/bin solenv/doc solenv/gbuild sot/source stoc/source svgio/inc svgio/source svl/qa svl/source svtools/inc svtools/source svx/inc svx/source sw/CppunitTest_sw_odfim port.mk sw/CppunitTest_sw_ooxmlexport.mk sw/CppunitTest_sw_ooxmlimport.mk sw/CppunitTest_sw_uiwriter.mk sw/inc sw/JunitTest_sw_complex.mk sw/Module_sw.mk sw/qa sw/sdi sw/source sw/uiconfig sw/UIConfig_swriter.mk toolkit/Library_tk.mk tools/source ucb/source unoidl/source unotools/source unoxml/source unusedcode.easy vcl/android vcl/aqua vcl/coretext vcl/inc vcl/ios vcl/qa vcl/source vcl/unx vcl/win wizards/Module_wizards.mk wizards/Package_access2base.mk wizards/source writerfilter/inc writerfilter/source writerperfect/qa xmlhelp/source xmloff/inc xmloff/Library_xo.mk xmloff/source xmlsecurity/inc
Matúš Kukan
matus.kukan at gmail.com
Thu Oct 17 01:29:19 PDT 2013
Rebased ref, commits from common ancestor:
commit c243a428a2c68e91dc344dacf9291ff361a58f15
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Wed Oct 16 19:58:36 2013 +0200
fastparser: don't use multithreading for small documents
Determined by XInputStream::available().
Change-Id: I450f4796d9c072b395393582bfc3e1e7768e243b
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index fead88d..d7635d9 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -204,6 +204,7 @@ Entity::Entity( const ParserData& rData ) :
Entity::Entity( const Entity& e ) :
ParserData( e )
+ ,mbEnableThreads(e.mbEnableThreads)
,maStructSource(e.maStructSource)
,mpParser(e.mpParser)
,maConverter(e.maConverter)
@@ -330,6 +331,9 @@ EventList* Entity::getEventList()
Event& Entity::getEvent( CallbackType aType )
{
+ if (!mbEnableThreads)
+ return maSharedEvent;
+
EventList* pEventList = getEventList();
Event& rEvent = (*pEventList)[mnProducedEventsSize++];
rEvent.maType = aType;
@@ -570,33 +574,42 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
entity.mxDocumentHandler->startDocument();
}
- rtl::Reference<ParserThread> xParser;
- xParser = new ParserThread(this);
- xParser->launch();
- bool done = false;
- do {
- rEntity.maConsumeResume.wait();
- rEntity.maConsumeResume.reset();
+ rEntity.mbEnableThreads = (rEntity.maStructSource.aInputStream->available() > 10000);
- osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
- while (!rEntity.maPendingEvents.empty())
- {
- if (rEntity.maPendingEvents.size() <= rEntity.mnEventLowWater)
- rEntity.maProduceResume.set(); // start producer again
+ if (rEntity.mbEnableThreads)
+ {
+ rtl::Reference<ParserThread> xParser;
+ xParser = new ParserThread(this);
+ xParser->launch();
+ bool done = false;
+ do {
+ rEntity.maConsumeResume.wait();
+ rEntity.maConsumeResume.reset();
+
+ osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
+ while (!rEntity.maPendingEvents.empty())
+ {
+ if (rEntity.maPendingEvents.size() <= rEntity.mnEventLowWater)
+ rEntity.maProduceResume.set(); // start producer again
- EventList *pEventList = rEntity.maPendingEvents.front();
- rEntity.maPendingEvents.pop();
- aGuard.clear(); // unlock
+ EventList *pEventList = rEntity.maPendingEvents.front();
+ rEntity.maPendingEvents.pop();
+ aGuard.clear(); // unlock
- if (!consume(pEventList))
- done = true;
+ if (!consume(pEventList))
+ done = true;
- aGuard.reset(); // lock
- rEntity.maUsedEvents.push(pEventList);
- }
- } while (!done);
- xParser->join();
- deleteUsedEvents();
+ aGuard.reset(); // lock
+ rEntity.maUsedEvents.push(pEventList);
+ }
+ } while (!done);
+ xParser->join();
+ deleteUsedEvents();
+ }
+ else
+ {
+ parse();
+ }
// finish document
if( entity.mxDocumentHandler.is() )
@@ -906,7 +919,8 @@ void FastSaxParser::parse()
}
while( nRead > 0 );
rEntity.getEvent( CallbackType::DONE );
- produce( CallbackType::DONE );
+ if (rEntity.mbEnableThreads)
+ produce( CallbackType::DONE );
}
//------------------------------------------
@@ -1023,7 +1037,10 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
rEntity.maNamespaceStack.push( NameWithToken(rEvent.msNamespace, nNamespaceToken) );
rEvent.msElementName = OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8);
- produce( CallbackType::START_ELEMENT );
+ if (rEntity.mbEnableThreads)
+ produce( CallbackType::START_ELEMENT );
+ else
+ rEntity.startElement( &rEvent );
}
catch (const Exception& e)
{
@@ -1043,15 +1060,22 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
rEntity.maNamespaceStack.pop();
rEntity.getEvent( CallbackType::END_ELEMENT );
- produce( CallbackType::END_ELEMENT );
+ if (rEntity.mbEnableThreads)
+ produce( CallbackType::END_ELEMENT );
+ else
+ rEntity.endElement();
}
void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
{
- Event& rEvent = getEntity().getEvent( CallbackType::CHARACTERS );
+ Entity& rEntity = getEntity();
+ Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
rEvent.msChars = OUString(s, nLen, RTL_TEXTENCODING_UTF8);
- produce( CallbackType::CHARACTERS );
+ if (rEntity.mbEnableThreads)
+ produce( CallbackType::CHARACTERS );
+ else
+ rEntity.characters( rEvent.msChars );
}
void FastSaxParser::callbackEntityDecl(
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index cf6b911..443c359 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -107,6 +107,7 @@ struct Entity : public ParserData
{
// Amount of work producer sends to consumer in one iteration:
static const size_t mnEventListSize = 1000;
+
// unique for each Entity instance:
// Number of valid events in mpProducedEvents:
@@ -120,9 +121,13 @@ struct Entity : public ParserData
static const size_t mnEventHighWater = 8;
osl::Condition maConsumeResume;
osl::Condition maProduceResume;
+ // Event we use to store data if threading is disabled:
+ Event maSharedEvent;
// copied in copy constructor:
+ // Allow to disable threading for small documents:
+ bool mbEnableThreads;
::com::sun::star::xml::sax::InputSource maStructSource;
XML_Parser mpParser;
::sax_expatwrap::XMLFile2UTFConverter maConverter;
commit d0d93d8eca19ea8ba1846e13f9fec226defad717
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Tue Oct 15 14:43:24 2013 +0200
fastparser: don't create temporary Events; use references to event list
Change-Id: I1e12fbeeb90d6020d0566d05fc0318082e1da5fc
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 8748ee4..fead88d 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -76,7 +76,8 @@ private:
}
catch (const SAXParseException& e)
{
- mpParser->produce(Event( CallbackType::EXCEPTION ));
+ mpParser->getEntity().getEvent( CallbackType::EXCEPTION );
+ mpParser->produce( CallbackType::EXCEPTION );
}
}
};
@@ -187,28 +188,6 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
// --------------------------------------------------------------------
-Event::Event(const CallbackType& t): maType(t)
-{}
-
-Event::Event(const CallbackType& t, const OUString& sChars): Event(t)
-{
- msChars = sChars;
-}
-
-Event::Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
- const OUString& aElementName, FastAttributeList *pAttributes): Event(t)
-{
- mnElementToken = nElementToken;
- maNamespace = aNamespace;
- maElementName = aElementName;
- mpAttributes = rtl::Reference< FastAttributeList >(pAttributes);
-}
-
-Event::~Event()
-{}
-
-// --------------------------------------------------------------------
-
ParserData::ParserData()
{}
@@ -243,9 +222,9 @@ Entity::~Entity()
void Entity::startElement( Event *pEvent )
{
- const sal_Int32& nElementToken = pEvent->mnElementToken.get();
- const OUString& aNamespace = pEvent->maNamespace.get();
- const OUString& aElementName = pEvent->maElementName.get();
+ const sal_Int32& nElementToken = pEvent->mnElementToken;
+ const OUString& aNamespace = pEvent->msNamespace;
+ const OUString& aElementName = pEvent->msElementName;
Reference< XFastContextHandler > xParentContext;
if( !maContextStack.empty() )
{
@@ -261,7 +240,7 @@ void Entity::startElement( Event *pEvent )
try
{
- Reference< XFastAttributeList > xAttr( pEvent->mpAttributes.get().get() );
+ Reference< XFastAttributeList > xAttr( pEvent->mxAttributes.get() );
Reference< XFastContextHandler > xContext;
if( nElementToken == FastToken::DONTKNOW )
{
@@ -337,17 +316,26 @@ EventList* Entity::getEventList()
mpProducedEvents = maUsedEvents.front();
maUsedEvents.pop();
aGuard.clear(); // unlock
- mpProducedEvents->clear();
+ mnProducedEventsSize = 0;
}
if (!mpProducedEvents)
{
mpProducedEvents = new EventList();
- mpProducedEvents->reserve(mnEventListSize);
+ mpProducedEvents->resize(mnEventListSize);
+ mnProducedEventsSize = 0;
}
}
return mpProducedEvents;
}
+Event& Entity::getEvent( CallbackType aType )
+{
+ EventList* pEventList = getEventList();
+ Event& rEvent = (*pEventList)[mnProducedEventsSize++];
+ rEvent.maType = aType;
+ return rEvent;
+}
+
// --------------------------------------------------------------------
// FastSaxParser implementation
// --------------------------------------------------------------------
@@ -604,6 +592,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
done = true;
aGuard.reset(); // lock
+ rEntity.maUsedEvents.push(pEventList);
}
} while (!done);
xParser->join();
@@ -793,14 +782,12 @@ void FastSaxParser::deleteUsedEvents()
}
}
-void FastSaxParser::produce(const Event& aEvent)
+void FastSaxParser::produce( CallbackType aType )
{
Entity& rEntity = getEntity();
- EventList* pEventList = rEntity.getEventList();
- pEventList->push_back( aEvent );
- if (aEvent.maType == CallbackType::DONE ||
- aEvent.maType == CallbackType::EXCEPTION ||
- pEventList->size() == rEntity.mnEventListSize)
+ if (aType == CallbackType::DONE ||
+ aType == CallbackType::EXCEPTION ||
+ rEntity.mnProducedEventsSize == rEntity.mnEventListSize)
{
osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
@@ -812,7 +799,7 @@ void FastSaxParser::produce(const Event& aEvent)
aGuard.reset(); // lock
}
- rEntity.maPendingEvents.push(pEventList);
+ rEntity.maPendingEvents.push(rEntity.mpProducedEvents);
rEntity.mpProducedEvents = 0;
aGuard.clear(); // unlock
@@ -824,7 +811,6 @@ void FastSaxParser::produce(const Event& aEvent)
bool FastSaxParser::consume(EventList *pEventList)
{
Entity& rEntity = getEntity();
- bool bIsParserFinished = false;
for (EventList::iterator aEventIt = pEventList->begin();
aEventIt != pEventList->end(); ++aEventIt)
{
@@ -837,13 +823,12 @@ bool FastSaxParser::consume(EventList *pEventList)
rEntity.endElement();
break;
case CallbackType::CHARACTERS:
- rEntity.characters( (*aEventIt).msChars.get() );
+ rEntity.characters( (*aEventIt).msChars );
break;
case CallbackType::DONE:
- bIsParserFinished = true;
- assert(aEventIt+1 == pEventList->end());
- break;
+ return false;
case CallbackType::EXCEPTION:
+ {
assert( rEntity.maSavedException.hasValue() );
// Error during parsing !
XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
@@ -864,10 +849,13 @@ bool FastSaxParser::consume(EventList *pEventList)
rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
throw aExcept;
+ }
+ default:
+ assert(false);
+ return false;
}
}
- rEntity.maUsedEvents.push(pEventList);
- return !bIsParserFinished;
+ return true;
}
// starts parsing with actual parser !
@@ -917,7 +905,8 @@ void FastSaxParser::parse()
}
}
while( nRead > 0 );
- produce(Event( CallbackType::DONE ));
+ rEntity.getEvent( CallbackType::DONE );
+ produce( CallbackType::DONE );
}
//------------------------------------------
@@ -940,15 +929,20 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
}
// create attribute map and process namespace instructions
+ Event& rEvent = getEntity().getEvent( CallbackType::START_ELEMENT );
+ if (rEvent.mxAttributes.is())
+ rEvent.mxAttributes->clear();
+ else
+ rEvent.mxAttributes.set( new FastAttributeList( rEntity.mxTokenHandler ) );
+
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
const XML_Char *pPrefix;
- OUString sNamespace;
+
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
- FastAttributeList *pAttributes = new FastAttributeList( rEntity.mxTokenHandler );
if (!rEntity.maNamespaceStack.empty())
{
- sNamespace = rEntity.maNamespaceStack.top().msName;
+ rEvent.msNamespace = rEntity.maNamespaceStack.top().msName;
nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
}
@@ -977,8 +971,8 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
{
// default namespace is the attribute value
- sNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
- nNamespaceToken = GetNamespaceToken( sNamespace );
+ rEvent.msNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
+ nNamespaceToken = GetNamespaceToken( rEvent.msNamespace );
}
}
}
@@ -993,9 +987,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- pAttributes->add( nAttributeToken, awAttributes[i+1] );
+ rEvent.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- pAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
+ rEvent.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
OString(pName, nNameLen), awAttributes[i+1] );
}
}
@@ -1005,32 +999,31 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetToken( pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- pAttributes->add( nAttributeToken, awAttributes[i+1] );
+ rEvent.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- pAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
+ rEvent.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
}
}
}
- sal_Int32 nElementToken;
splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
if( nPrefixLen > 0 )
- nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
- else if( !sNamespace.isEmpty() )
- nElementToken = GetTokenWithContextNamespace( nNamespaceToken, pName, nNameLen );
+ rEvent.mnElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
+ else if( !rEvent.msNamespace.isEmpty() )
+ rEvent.mnElementToken = GetTokenWithContextNamespace( nNamespaceToken, pName, nNameLen );
else
- nElementToken = GetToken( pName );
+ rEvent.mnElementToken = GetToken( pName );
- if( nElementToken == FastToken::DONTKNOW )
+ if( rEvent.mnElementToken == FastToken::DONTKNOW )
if( nPrefixLen > 0 )
{
- sNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
- nNamespaceToken = GetNamespaceToken( sNamespace );
+ rEvent.msNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
+ nNamespaceToken = GetNamespaceToken( rEvent.msNamespace );
}
- rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
- produce(Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
- OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes ));
+ rEntity.maNamespaceStack.push( NameWithToken(rEvent.msNamespace, nNamespaceToken) );
+ rEvent.msElementName = OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8);
+ produce( CallbackType::START_ELEMENT );
}
catch (const Exception& e)
{
@@ -1049,13 +1042,16 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
if( !rEntity.maNamespaceStack.empty() )
rEntity.maNamespaceStack.pop();
- produce(Event( CallbackType::END_ELEMENT ));
+ rEntity.getEvent( CallbackType::END_ELEMENT );
+ produce( CallbackType::END_ELEMENT );
}
void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
{
- produce(Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) ));
+ Event& rEvent = getEntity().getEvent( CallbackType::CHARACTERS );
+ rEvent.msChars = OUString(s, nLen, RTL_TEXTENCODING_UTF8);
+ produce( CallbackType::CHARACTERS );
}
void FastSaxParser::callbackEntityDecl(
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index a1cef38..cf6b911 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -64,20 +64,15 @@ struct NameWithToken
typedef std::vector<Event> EventList;
-enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
+enum CallbackType { INVALID, START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
struct Event {
- boost::optional< OUString > msChars;
- boost::optional< sal_Int32 > mnElementToken;
- boost::optional< OUString > maNamespace;
- boost::optional< OUString > maElementName;
- boost::optional< rtl::Reference< FastAttributeList > > mpAttributes;
+ OUString msChars;
+ sal_Int32 mnElementToken;
+ OUString msNamespace;
+ OUString msElementName;
+ rtl::Reference< FastAttributeList > mxAttributes;
CallbackType maType;
- Event(const CallbackType& t);
- Event(const CallbackType& t, const OUString& sChars);
- Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
- const OUString& aElementName, FastAttributeList *pAttributes);
- ~Event();
};
// --------------------------------------------------------------------
@@ -114,6 +109,8 @@ struct Entity : public ParserData
static const size_t mnEventListSize = 1000;
// unique for each Entity instance:
+ // Number of valid events in mpProducedEvents:
+ size_t mnProducedEventsSize;
EventList *mpProducedEvents;
std::queue< EventList * > maPendingEvents;
std::queue< EventList * > maUsedEvents;
@@ -150,6 +147,7 @@ struct Entity : public ParserData
void characters( const OUString& sChars );
void endElement();
EventList* getEventList();
+ Event& getEvent( CallbackType aType );
};
// --------------------------------------------------------------------
@@ -193,7 +191,7 @@ public:
inline void popEntity() { maEntities.pop(); }
Entity& getEntity() { return maEntities.top(); }
void parse();
- void produce( const Event& );
+ void produce( CallbackType aType );
private:
bool consume(EventList *);
commit e6e6a30fc369c19db405f842bde777c6d330e9cc
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Tue Oct 15 10:32:55 2013 +0200
fastparser: reuse event lists if possible
Instead of allocating and freeing the memory all the time.
Change-Id: I53800abaca51d42d7d44a98fb271de7df7f90f58
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 2040c32..8748ee4 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -326,6 +326,28 @@ void Entity::endElement()
}
maContextStack.pop();
}
+
+EventList* Entity::getEventList()
+{
+ if (!mpProducedEvents)
+ {
+ osl::ResettableMutexGuard aGuard(maEventProtector);
+ if (!maUsedEvents.empty())
+ {
+ mpProducedEvents = maUsedEvents.front();
+ maUsedEvents.pop();
+ aGuard.clear(); // unlock
+ mpProducedEvents->clear();
+ }
+ if (!mpProducedEvents)
+ {
+ mpProducedEvents = new EventList();
+ mpProducedEvents->reserve(mnEventListSize);
+ }
+ }
+ return mpProducedEvents;
+}
+
// --------------------------------------------------------------------
// FastSaxParser implementation
// --------------------------------------------------------------------
@@ -774,15 +796,11 @@ void FastSaxParser::deleteUsedEvents()
void FastSaxParser::produce(const Event& aEvent)
{
Entity& rEntity = getEntity();
- if (!rEntity.mpProducedEvents)
- {
- rEntity.mpProducedEvents = new EventList();
- rEntity.mpProducedEvents->reserve(rEntity.mnEventListSize);
- }
- rEntity.mpProducedEvents->push_back( aEvent );
- if (aEvent->maType == CallbackType::DONE ||
- aEvent->maType == CallbackType::EXCEPTION ||
- rEntity.mpProducedEvents->size() == rEntity.mnEventListSize)
+ EventList* pEventList = rEntity.getEventList();
+ pEventList->push_back( aEvent );
+ if (aEvent.maType == CallbackType::DONE ||
+ aEvent.maType == CallbackType::EXCEPTION ||
+ pEventList->size() == rEntity.mnEventListSize)
{
osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
@@ -794,14 +812,12 @@ void FastSaxParser::produce(const Event& aEvent)
aGuard.reset(); // lock
}
- rEntity.maPendingEvents.push(rEntity.mpProducedEvents);
+ rEntity.maPendingEvents.push(pEventList);
rEntity.mpProducedEvents = 0;
aGuard.clear(); // unlock
rEntity.maConsumeResume.set();
-
- deleteUsedEvents();
}
}
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 90cc043..a1cef38 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -149,6 +149,7 @@ struct Entity : public ParserData
void startElement( Event *pEvent );
void characters( const OUString& sChars );
void endElement();
+ EventList* getEventList();
};
// --------------------------------------------------------------------
commit 15195c679e86620f9add5878ceb70d3a1328a390
Author: Michael Meeks <michael.meeks at collabora.com>
Date: Fri Oct 11 14:09:52 2013 +0100
fastparser: re-work locking, add high & low watermarks, change sizes etc.
Change-Id: I7fe1435addc6dce5a74a8411f7825cea331a5b3f
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 6f92f59..2040c32 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -565,15 +565,23 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
xParser->launch();
bool done = false;
do {
- rEntity.maEventsPushed.wait();
- rEntity.maEventsPushed.reset();
- MutexGuard aGuard(rEntity.maEventProtector);
+ rEntity.maConsumeResume.wait();
+ rEntity.maConsumeResume.reset();
+
+ osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
while (!rEntity.maPendingEvents.empty())
{
+ if (rEntity.maPendingEvents.size() <= rEntity.mnEventLowWater)
+ rEntity.maProduceResume.set(); // start producer again
+
EventList *pEventList = rEntity.maPendingEvents.front();
rEntity.maPendingEvents.pop();
+ aGuard.clear(); // unlock
+
if (!consume(pEventList))
done = true;
+
+ aGuard.reset(); // lock
}
} while (!done);
xParser->join();
@@ -748,12 +756,18 @@ OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int3
void FastSaxParser::deleteUsedEvents()
{
Entity& rEntity = getEntity();
+ osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
+
while (!rEntity.maUsedEvents.empty())
{
EventList *pEventList = rEntity.maUsedEvents.front();
rEntity.maUsedEvents.pop();
+ aGuard.clear(); // unlock
+
delete pEventList;
+
+ aGuard.reset(); // lock
}
}
@@ -770,11 +784,24 @@ void FastSaxParser::produce(const Event& aEvent)
aEvent->maType == CallbackType::EXCEPTION ||
rEntity.mpProducedEvents->size() == rEntity.mnEventListSize)
{
- MutexGuard aGuard(rEntity.maEventProtector);
+ osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
+
+ while (rEntity.maPendingEvents.size() >= rEntity.mnEventHighWater)
+ { // pause parsing for a bit
+ aGuard.clear(); // unlock
+ rEntity.maProduceResume.wait();
+ rEntity.maProduceResume.reset();
+ aGuard.reset(); // lock
+ }
+
rEntity.maPendingEvents.push(rEntity.mpProducedEvents);
rEntity.mpProducedEvents = 0;
+
+ aGuard.clear(); // unlock
+
+ rEntity.maConsumeResume.set();
+
deleteUsedEvents();
- rEntity.maEventsPushed.set();
}
}
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 933ec9e..90cc043 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -118,7 +118,11 @@ struct Entity : public ParserData
std::queue< EventList * > maPendingEvents;
std::queue< EventList * > maUsedEvents;
osl::Mutex maEventProtector;
- osl::Condition maEventsPushed;
+
+ static const size_t mnEventLowWater = 4;
+ static const size_t mnEventHighWater = 8;
+ osl::Condition maConsumeResume;
+ osl::Condition maProduceResume;
// copied in copy constructor:
commit 592e470f3e9428f78a8e561991aea1790b1a30dc
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Tue Oct 8 23:28:32 2013 +0200
fastparser: implementation using two threads
Instead of calling methods directly, generate EventList - vector of
Events, where arguments for the callee are stored.
Change-Id: I227a0ef3038566664ac8f294770152c8b445997b
diff --git a/sax/Library_fastsax.mk b/sax/Library_fastsax.mk
index 6fa858e..ab0c4e6 100644
--- a/sax/Library_fastsax.mk
+++ b/sax/Library_fastsax.mk
@@ -28,6 +28,7 @@ $(eval $(call gb_Library_use_libraries,fastsax,\
cppu \
cppuhelper \
sal \
+ salhelper \
sax \
$(gb_UWINAPI) \
))
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 825160f..6f92f59 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -21,6 +21,7 @@
#include <osl/diagnose.h>
#include <rtl/ustrbuf.hxx>
+#include <salhelper/thread.hxx>
#include <com/sun/star/lang/DisposedException.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp>
@@ -61,6 +62,25 @@ struct NamespaceDefine
NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
};
+class ParserThread: public salhelper::Thread
+{
+ FastSaxParser *mpParser;
+public:
+ ParserThread(FastSaxParser *pParser): Thread("Parser"), mpParser(pParser) {}
+private:
+ virtual void execute()
+ {
+ try
+ {
+ mpParser->parse();
+ }
+ catch (const SAXParseException& e)
+ {
+ mpParser->produce(Event( CallbackType::EXCEPTION ));
+ }
+ }
+};
+
// --------------------------------------------------------------------
// FastLocatorImpl
// --------------------------------------------------------------------
@@ -167,31 +187,65 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
// --------------------------------------------------------------------
-ParserData::ParserData()
+Event::Event(const CallbackType& t): maType(t)
+{}
+
+Event::Event(const CallbackType& t, const OUString& sChars): Event(t)
{
+ msChars = sChars;
}
-ParserData::~ParserData()
+Event::Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
+ const OUString& aElementName, FastAttributeList *pAttributes): Event(t)
{
+ mnElementToken = nElementToken;
+ maNamespace = aNamespace;
+ maElementName = aElementName;
+ mpAttributes = rtl::Reference< FastAttributeList >(pAttributes);
}
+Event::~Event()
+{}
+
+// --------------------------------------------------------------------
+
+ParserData::ParserData()
+{}
+
+ParserData::~ParserData()
+{}
+
// --------------------------------------------------------------------
Entity::Entity( const ParserData& rData ) :
ParserData( rData )
{
- // performance-improvement. Reference is needed when calling the startTag callback.
- // Handing out the same object with every call is allowed (see sax-specification)
- mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
+ mpProducedEvents = 0;
+}
+
+Entity::Entity( const Entity& e ) :
+ ParserData( e )
+ ,maStructSource(e.maStructSource)
+ ,mpParser(e.mpParser)
+ ,maConverter(e.maConverter)
+ ,maSavedException(e.maSavedException)
+ ,maNamespaceStack(e.maNamespaceStack)
+ ,maContextStack(e.maContextStack)
+ ,maNamespaceCount(e.maNamespaceCount)
+ ,maNamespaceDefines(e.maNamespaceDefines)
+{
+ mpProducedEvents = 0;
}
Entity::~Entity()
{
}
-void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace,
- const OUString& aElementName, FastAttributeList *pAttributes )
+void Entity::startElement( Event *pEvent )
{
+ const sal_Int32& nElementToken = pEvent->mnElementToken.get();
+ const OUString& aNamespace = pEvent->maNamespace.get();
+ const OUString& aElementName = pEvent->maElementName.get();
Reference< XFastContextHandler > xParentContext;
if( !maContextStack.empty() )
{
@@ -207,7 +261,7 @@ void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace,
try
{
- Reference< XFastAttributeList > xAttr( pAttributes );
+ Reference< XFastAttributeList > xAttr( pEvent->mpAttributes.get().get() );
Reference< XFastContextHandler > xContext;
if( nElementToken == FastToken::DONTKNOW )
{
@@ -495,6 +549,7 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
pushEntity( entity );
+ Entity& rEntity = getEntity();
try
{
// start the document
@@ -505,7 +560,24 @@ void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXEx
entity.mxDocumentHandler->startDocument();
}
- parse();
+ rtl::Reference<ParserThread> xParser;
+ xParser = new ParserThread(this);
+ xParser->launch();
+ bool done = false;
+ do {
+ rEntity.maEventsPushed.wait();
+ rEntity.maEventsPushed.reset();
+ MutexGuard aGuard(rEntity.maEventProtector);
+ while (!rEntity.maPendingEvents.empty())
+ {
+ EventList *pEventList = rEntity.maPendingEvents.front();
+ rEntity.maPendingEvents.pop();
+ if (!consume(pEventList))
+ done = true;
+ }
+ } while (!done);
+ xParser->join();
+ deleteUsedEvents();
// finish document
if( entity.mxDocumentHandler.is() )
@@ -673,6 +745,88 @@ OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int3
} // namespace
+void FastSaxParser::deleteUsedEvents()
+{
+ Entity& rEntity = getEntity();
+ while (!rEntity.maUsedEvents.empty())
+ {
+ EventList *pEventList = rEntity.maUsedEvents.front();
+ rEntity.maUsedEvents.pop();
+
+ delete pEventList;
+ }
+}
+
+void FastSaxParser::produce(const Event& aEvent)
+{
+ Entity& rEntity = getEntity();
+ if (!rEntity.mpProducedEvents)
+ {
+ rEntity.mpProducedEvents = new EventList();
+ rEntity.mpProducedEvents->reserve(rEntity.mnEventListSize);
+ }
+ rEntity.mpProducedEvents->push_back( aEvent );
+ if (aEvent->maType == CallbackType::DONE ||
+ aEvent->maType == CallbackType::EXCEPTION ||
+ rEntity.mpProducedEvents->size() == rEntity.mnEventListSize)
+ {
+ MutexGuard aGuard(rEntity.maEventProtector);
+ rEntity.maPendingEvents.push(rEntity.mpProducedEvents);
+ rEntity.mpProducedEvents = 0;
+ deleteUsedEvents();
+ rEntity.maEventsPushed.set();
+ }
+}
+
+bool FastSaxParser::consume(EventList *pEventList)
+{
+ Entity& rEntity = getEntity();
+ bool bIsParserFinished = false;
+ for (EventList::iterator aEventIt = pEventList->begin();
+ aEventIt != pEventList->end(); ++aEventIt)
+ {
+ switch ((*aEventIt).maType)
+ {
+ case CallbackType::START_ELEMENT:
+ rEntity.startElement( &(*aEventIt) );
+ break;
+ case CallbackType::END_ELEMENT:
+ rEntity.endElement();
+ break;
+ case CallbackType::CHARACTERS:
+ rEntity.characters( (*aEventIt).msChars.get() );
+ break;
+ case CallbackType::DONE:
+ bIsParserFinished = true;
+ assert(aEventIt+1 == pEventList->end());
+ break;
+ case CallbackType::EXCEPTION:
+ assert( rEntity.maSavedException.hasValue() );
+ // Error during parsing !
+ XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
+ OUString sSystemId = mxDocumentLocator->getSystemId();
+ sal_Int32 nLine = mxDocumentLocator->getLineNumber();
+
+ SAXParseException aExcept(
+ lclGetErrorMessage( xmlE, sSystemId, nLine ),
+ Reference< XInterface >(),
+ Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
+ mxDocumentLocator->getPublicId(),
+ mxDocumentLocator->getSystemId(),
+ mxDocumentLocator->getLineNumber(),
+ mxDocumentLocator->getColumnNumber()
+ );
+ // error handler is set, it may throw the exception
+ if( rEntity.mxErrorHandler.is() )
+ rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
+
+ throw aExcept;
+ }
+ }
+ rEntity.maUsedEvents.push(pEventList);
+ return !bIsParserFinished;
+}
+
// starts parsing with actual parser !
void FastSaxParser::parse()
{
@@ -720,6 +874,7 @@ void FastSaxParser::parse()
}
}
while( nRead > 0 );
+ produce(Event( CallbackType::DONE ));
}
//------------------------------------------
@@ -741,14 +896,13 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
}
- rEntity.mxAttributes->clear();
-
// create attribute map and process namespace instructions
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
const XML_Char *pPrefix;
OUString sNamespace;
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
+ FastAttributeList *pAttributes = new FastAttributeList( rEntity.mxTokenHandler );
if (!rEntity.maNamespaceStack.empty())
{
sNamespace = rEntity.maNamespaceStack.top().msName;
@@ -796,9 +950,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
+ pAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- rEntity.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
+ pAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
OString(pName, nNameLen), awAttributes[i+1] );
}
}
@@ -808,9 +962,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetToken( pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
+ pAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
+ pAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
}
}
}
@@ -832,8 +986,8 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
}
rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
- rEntity.startElement( nElementToken, sNamespace,
- OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), rEntity.mxAttributes.get() );
+ produce(Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
+ OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes ));
}
catch (const Exception& e)
{
@@ -852,13 +1006,13 @@ void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
if( !rEntity.maNamespaceStack.empty() )
rEntity.maNamespaceStack.pop();
- rEntity.endElement();
+ produce(Event( CallbackType::END_ELEMENT ));
}
void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
{
- getEntity().characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
+ produce(Event( CallbackType::CHARACTERS, OUString(s, nLen, RTL_TEXTENCODING_UTF8) ));
}
void FastSaxParser::callbackEntityDecl(
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 4d31212..933ec9e 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -20,11 +20,13 @@
#ifndef _SAX_FASTPARSER_HXX_
#define _SAX_FASTPARSER_HXX_
+#include <queue>
#include <vector>
#include <stack>
#include <boost/optional.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
+#include <osl/conditn.hxx>
#include <rtl/ref.hxx>
#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
@@ -43,6 +45,7 @@
namespace sax_fastparser {
+struct Event;
class FastLocatorImpl;
struct NamespaceDefine;
@@ -59,6 +62,24 @@ struct NameWithToken
msName(sName), mnToken(nToken) {}
};
+typedef std::vector<Event> EventList;
+
+enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
+
+struct Event {
+ boost::optional< OUString > msChars;
+ boost::optional< sal_Int32 > mnElementToken;
+ boost::optional< OUString > maNamespace;
+ boost::optional< OUString > maElementName;
+ boost::optional< rtl::Reference< FastAttributeList > > mpAttributes;
+ CallbackType maType;
+ Event(const CallbackType& t);
+ Event(const CallbackType& t, const OUString& sChars);
+ Event(const CallbackType& t, sal_Int32 nElementToken, const OUString& aNamespace,
+ const OUString& aElementName, FastAttributeList *pAttributes);
+ ~Event();
+};
+
// --------------------------------------------------------------------
struct SaxContext
@@ -86,13 +107,24 @@ struct ParserData
// --------------------------------------------------------------------
-// Entity binds all information needed for a single file
+// Entity binds all information needed for a single file | single call of parseStream
struct Entity : public ParserData
{
+ // Amount of work producer sends to consumer in one iteration:
+ static const size_t mnEventListSize = 1000;
+ // unique for each Entity instance:
+
+ EventList *mpProducedEvents;
+ std::queue< EventList * > maPendingEvents;
+ std::queue< EventList * > maUsedEvents;
+ osl::Mutex maEventProtector;
+ osl::Condition maEventsPushed;
+
+ // copied in copy constructor:
+
::com::sun::star::xml::sax::InputSource maStructSource;
XML_Parser mpParser;
::sax_expatwrap::XMLFile2UTFConverter maConverter;
- ::rtl::Reference< FastAttributeList > mxAttributes;
// Exceptions cannot be thrown through the C-XmlParser (possible resource leaks),
// therefore the exception must be saved somewhere.
@@ -108,9 +140,9 @@ struct Entity : public ParserData
::std::vector< NamespaceDefineRef > maNamespaceDefines;
explicit Entity( const ParserData& rData );
+ Entity( const Entity& rEntity );
~Entity();
- void startElement( sal_Int32 nElementToken, const OUString& aNamespace,
- const OUString& aElementName, FastAttributeList *pAttributes );
+ void startElement( Event *pEvent );
void characters( const OUString& sChars );
void endElement();
};
@@ -155,9 +187,12 @@ public:
inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); }
inline void popEntity() { maEntities.pop(); }
Entity& getEntity() { return maEntities.top(); }
+ void parse();
+ void produce( const Event& );
private:
- void parse();
+ bool consume(EventList *);
+ void deleteUsedEvents();
sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 );
sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException);
@@ -174,8 +209,7 @@ private:
void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen );
private:
- ::osl::Mutex maMutex;
-
+ osl::Mutex maMutex; ///< Protecting whole parseStream() execution
::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
NamespaceMap maNamespaceMap;
commit 9d371b474355746205045e366f9407f6a1b43e30
Author: Michael Meeks <michael.meeks at collabora.com>
Date: Fri Oct 11 21:46:45 2013 +0100
fastparser: cache default namespace token for ooxml.
Change-Id: Iee98ec92380d6d0404ab236e062ddbc2378cda43
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 9a31cc8..825160f 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -420,10 +420,8 @@ OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen
// --------------------------------------------------------------------
-sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen )
+sal_Int32 FastSaxParser::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const sal_Char* pName, int nNameLen )
{
- sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL );
-
if( nNamespaceToken != FastToken::DONTKNOW )
{
sal_Int32 nNameToken = GetToken( pName, nNameLen );
@@ -749,9 +747,13 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
const XML_Char *pPrefix;
- OUString aNamespace;
+ OUString sNamespace;
+ sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
if (!rEntity.maNamespaceStack.empty())
- aNamespace = rEntity.maNamespaceStack.top();
+ {
+ sNamespace = rEntity.maNamespaceStack.top().msName;
+ nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
+ }
try
{
@@ -777,8 +779,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
{
- // namespace of the element found
- aNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
+ // default namespace is the attribute value
+ sNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
+ nNamespaceToken = GetNamespaceToken( sNamespace );
}
}
}
@@ -816,17 +819,20 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
if( nPrefixLen > 0 )
nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
- else if( !aNamespace.isEmpty() )
- nElementToken = GetTokenWithNamespaceURL( aNamespace, pName, nNameLen );
+ else if( !sNamespace.isEmpty() )
+ nElementToken = GetTokenWithContextNamespace( nNamespaceToken, pName, nNameLen );
else
nElementToken = GetToken( pName );
if( nElementToken == FastToken::DONTKNOW )
if( nPrefixLen > 0 )
- aNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
+ {
+ sNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
+ nNamespaceToken = GetNamespaceToken( sNamespace );
+ }
- rEntity.maNamespaceStack.push(aNamespace);
- rEntity.startElement( nElementToken, aNamespace,
+ rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
+ rEntity.startElement( nElementToken, sNamespace,
OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), rEntity.mxAttributes.get() );
}
catch (const Exception& e)
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 3e6398b..4d31212 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -51,6 +51,14 @@ typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef;
typedef ::boost::unordered_map< OUString, sal_Int32,
OUStringHash, ::std::equal_to< OUString > > NamespaceMap;
+struct NameWithToken
+{
+ OUString msName;
+ sal_Int32 mnToken;
+ NameWithToken(const OUString& sName, const sal_Int32& nToken):
+ msName(sName), mnToken(nToken) {}
+};
+
// --------------------------------------------------------------------
struct SaxContext
@@ -90,7 +98,7 @@ struct Entity : public ParserData
// therefore the exception must be saved somewhere.
::com::sun::star::uno::Any maSavedException;
- ::std::stack< OUString > maNamespaceStack;
+ ::std::stack< NameWithToken > maNamespaceStack;
/* Context for main thread consuming events.
* startElement() stores the data, which characters() and endElement() uses
*/
@@ -138,7 +146,7 @@ public:
void callbackStartElement( const XML_Char* name, const XML_Char** atts );
void callbackEndElement( const XML_Char* name );
void callbackCharacters( const XML_Char* s, int nLen );
- int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId);
+ int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId);
void callbackEntityDecl(const XML_Char *entityName, int is_parameter_entity,
const XML_Char *value, int value_length, const XML_Char *base,
const XML_Char *systemId, const XML_Char *publicId,
@@ -156,7 +164,7 @@ private:
OUString GetNamespaceURL( const OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException);
OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException);
sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
- sal_Int32 GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen );
+ sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const sal_Char* pName, int nNameLen );
void DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL );
sal_Int32 CreateCustomToken( const sal_Char* pToken, int len = 0 );
commit b6271e50191c3a72cd8f1e486b78cb87c4ba8292
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Wed Oct 16 11:15:31 2013 +0200
FastAttributeList: avoid OStrings in attribute list; just use char buffer
Change-Id: I4879563fae3b85c68bbd1c4b260f9833848f4bda
diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx
index 88e9d61..238b7d4 100644
--- a/include/sax/fastattribs.hxx
+++ b/include/sax/fastattribs.hxx
@@ -40,9 +40,9 @@ struct UnknownAttribute
OString maName;
OString maValue;
- UnknownAttribute( const OUString& rNamespaceURL, const OString& rName, const OString& rValue );
+ UnknownAttribute( const OUString& rNamespaceURL, const OString& rName, const sal_Char* pValue );
- UnknownAttribute( const OString& rName, const OString& rValue );
+ UnknownAttribute( const OString& rName, const sal_Char* pValue );
void FillAttribute( ::com::sun::star::xml::Attribute* pAttrib ) const;
};
@@ -56,9 +56,10 @@ public:
virtual ~FastAttributeList();
void clear();
+ void add( sal_Int32 nToken, const sal_Char* pValue, size_t nValueLength = 0 );
void add( sal_Int32 nToken, const OString& rValue );
- void addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& rValue );
- void addUnknown( const OString& rName, const OString& rValue );
+ void addUnknown( const OUString& rNamespaceURL, const OString& rName, const sal_Char* pValue );
+ void addUnknown( const OString& rName, const sal_Char* pValue );
// XFastAttributeList
virtual ::sal_Bool SAL_CALL hasAttribute( ::sal_Int32 Token ) throw (::com::sun::star::uno::RuntimeException);
@@ -70,11 +71,18 @@ public:
virtual ::com::sun::star::uno::Sequence< ::com::sun::star::xml::FastAttribute > SAL_CALL getFastAttributes() throw (::com::sun::star::uno::RuntimeException);
private:
+ inline sal_Int32 AttributeValueLength(sal_Int32 i);
+
+private:
+ sal_Char *mpChunk; ///< buffer to store all attribute values - null terminated strings
+ sal_Int32 mnChunkLength; ///< size of allocated memory for mpChunk
+ // maAttributeValues stores pointers, relative to mpChunk, for each attribute value string
+ // length of the string is maAttributeValues[n+1] - maAttributeValues[n] - 1
+ // maAttributeValues[0] == 0
+ std::vector< sal_Int32 > maAttributeValues;
std::vector< sal_Int32 > maAttributeTokens;
- std::vector< OString > maAttributeValues;
UnknownAttributeList maUnknownAttributes;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler;
-
};
}
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 2387d23..9a31cc8 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -793,10 +793,10 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, OString(awAttributes[i+1]) );
+ rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
else
rEntity.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
- OString(pName, nNameLen), OString(awAttributes[i+1]) );
+ OString(pName, nNameLen), awAttributes[i+1] );
}
}
else
@@ -805,9 +805,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
sal_Int32 nAttributeToken = GetToken( pName, nNameLen );
if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, OString(awAttributes[i+1]) );
+ rEntity.mxAttributes->add( nAttributeToken, awAttributes[i+1] );
else
- rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), OString(awAttributes[i+1]) );
+ rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] );
}
}
}
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
index b25ff08..77f0f29 100644
--- a/sax/source/tools/fastattribs.cxx
+++ b/sax/source/tools/fastattribs.cxx
@@ -27,13 +27,13 @@ using namespace ::com::sun::star::xml::sax;
namespace sax_fastparser
{
-UnknownAttribute::UnknownAttribute( const OUString& rNamespaceURL, const OString& rName, const OString& rValue )
- : maNamespaceURL( rNamespaceURL ), maName( rName ), maValue( rValue )
+UnknownAttribute::UnknownAttribute( const OUString& rNamespaceURL, const OString& rName, const sal_Char* pValue )
+ : maNamespaceURL( rNamespaceURL ), maName( rName ), maValue( pValue )
{
}
-UnknownAttribute::UnknownAttribute( const OString& rName, const OString& rValue )
- : maName( rName ), maValue( rValue )
+UnknownAttribute::UnknownAttribute( const OString& rName, const sal_Char* pValue )
+ : maName( rName ), maValue( pValue )
{
}
@@ -50,33 +50,54 @@ void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const
FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler )
: mxTokenHandler( xTokenHandler )
{
+ // random initial size of buffer to store attribute values
+ mnChunkLength = 248;
+ mpChunk = (sal_Char *) malloc( mnChunkLength );
+ maAttributeValues.push_back( 0 );
}
FastAttributeList::~FastAttributeList()
{
+ free( mpChunk );
}
void FastAttributeList::clear()
{
maAttributeTokens.clear();
maAttributeValues.clear();
+ maAttributeValues.push_back( 0 );
maUnknownAttributes.clear();
}
-void FastAttributeList::add( sal_Int32 nToken, const OString& rValue )
+void FastAttributeList::add( sal_Int32 nToken, const sal_Char* pValue, size_t nValueLength )
{
maAttributeTokens.push_back( nToken );
- maAttributeValues.push_back( rValue );
+ if (nValueLength == 0)
+ nValueLength = strlen(pValue);
+ sal_Int32 nWritePosition = maAttributeValues.back();
+ maAttributeValues.push_back( maAttributeValues.back() + nValueLength + 1 );
+ if (maAttributeValues.back() > mnChunkLength)
+ {
+ mnChunkLength = maAttributeValues.back();
+ mpChunk = (sal_Char *) realloc( mpChunk, mnChunkLength );
+ }
+ strncpy(mpChunk + nWritePosition, pValue, nValueLength);
+ mpChunk[nWritePosition + nValueLength] = '\0';
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const OString& rValue )
+{
+ add( nToken, rValue.getStr(), rValue.getLength() );
}
-void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& rValue )
+void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const sal_Char* pValue )
{
- maUnknownAttributes.push_back( UnknownAttribute( rNamespaceURL, rName, rValue ) );
+ maUnknownAttributes.push_back( UnknownAttribute( rNamespaceURL, rName, pValue ) );
}
-void FastAttributeList::addUnknown( const OString& rName, const OString& rValue )
+void FastAttributeList::addUnknown( const OString& rName, const sal_Char* pValue )
{
- maUnknownAttributes.push_back( UnknownAttribute( rName, rValue ) );
+ maUnknownAttributes.push_back( UnknownAttribute( rName, pValue ) );
}
// XFastAttributeList
@@ -94,7 +115,7 @@ sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) throw (SAXExcept
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
if (maAttributeTokens[i] == Token)
{
- Sequence< sal_Int8 > aSeq( (sal_Int8*) maAttributeValues[i].getStr(), maAttributeValues[i].getLength() );
+ Sequence< sal_Int8 > aSeq( (sal_Int8*) mpChunk + maAttributeValues[i], AttributeValueLength(i) );
return mxTokenHandler->getTokenFromUTF8( aSeq );
}
@@ -106,7 +127,7 @@ sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
if (maAttributeTokens[i] == Token)
{
- Sequence< sal_Int8 > aSeq( (sal_Int8*) maAttributeValues[i].getStr(), maAttributeValues[i].getLength() );
+ Sequence< sal_Int8 > aSeq( (sal_Int8*) mpChunk + maAttributeValues[i], AttributeValueLength(i) );
return mxTokenHandler->getTokenFromUTF8( aSeq );
}
@@ -117,7 +138,7 @@ OUString FastAttributeList::getValue( ::sal_Int32 Token ) throw (SAXException, R
{
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
if (maAttributeTokens[i] == Token)
- return OStringToOUString( maAttributeValues[i], RTL_TEXTENCODING_UTF8 );
+ return OUString( mpChunk + maAttributeValues[i], AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
throw SAXException();
}
@@ -126,7 +147,7 @@ OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token ) throw (Runtime
{
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
if (maAttributeTokens[i] == Token)
- return OStringToOUString( maAttributeValues[i], RTL_TEXTENCODING_UTF8 );
+ return OUString( mpChunk + maAttributeValues[i], AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
return OUString();
}
@@ -145,12 +166,18 @@ Sequence< FastAttribute > FastAttributeList::getFastAttributes( ) throw (Runtim
for (size_t i = 0; i < maAttributeTokens.size(); ++i)
{
pAttr->Token = maAttributeTokens[i];
- pAttr->Value = OStringToOUString( maAttributeValues[i], RTL_TEXTENCODING_UTF8 );
+ pAttr->Value = OUString( mpChunk + maAttributeValues[i], AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
pAttr++;
}
return aSeq;
}
+sal_Int32 FastAttributeList::AttributeValueLength(sal_Int32 i)
+{
+ // Pointers to null terminated strings
+ return maAttributeValues[i + 1] - maAttributeValues[i] - 1;
+}
+
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
commit 9ef431132481663226e8477c2e1c0c2b76ffaf6b
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Tue Oct 15 09:29:27 2013 +0200
FastAttributeList: use vectors instead of map; the size is small
This is also preparation to avoid OString internal usage.
Change-Id: If0ea36155d8ab3f5c91c2aafd6932fabeadadd41
diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx
index f31be4b..88e9d61 100644
--- a/include/sax/fastattribs.hxx
+++ b/include/sax/fastattribs.hxx
@@ -47,7 +47,6 @@ struct UnknownAttribute
void FillAttribute( ::com::sun::star::xml::Attribute* pAttrib ) const;
};
-typedef std::map< sal_Int32, OString > FastAttributeMap;
typedef std::vector< UnknownAttribute > UnknownAttributeList;
class SAX_DLLPUBLIC FastAttributeList : public ::cppu::WeakImplHelper1< ::com::sun::star::xml::sax::XFastAttributeList >
@@ -71,9 +70,9 @@ public:
virtual ::com::sun::star::uno::Sequence< ::com::sun::star::xml::FastAttribute > SAL_CALL getFastAttributes() throw (::com::sun::star::uno::RuntimeException);
private:
- FastAttributeMap maAttributes;
+ std::vector< sal_Int32 > maAttributeTokens;
+ std::vector< OString > maAttributeValues;
UnknownAttributeList maUnknownAttributes;
- FastAttributeMap::iterator maLastIter;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler;
};
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
index bb348a4..b25ff08 100644
--- a/sax/source/tools/fastattribs.cxx
+++ b/sax/source/tools/fastattribs.cxx
@@ -50,7 +50,6 @@ void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const
FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler )
: mxTokenHandler( xTokenHandler )
{
- maLastIter = maAttributes.end();
}
FastAttributeList::~FastAttributeList()
@@ -59,14 +58,15 @@ FastAttributeList::~FastAttributeList()
void FastAttributeList::clear()
{
- maAttributes.clear();
+ maAttributeTokens.clear();
+ maAttributeValues.clear();
maUnknownAttributes.clear();
- maLastIter = maAttributes.end();
}
void FastAttributeList::add( sal_Int32 nToken, const OString& rValue )
{
- maAttributes[nToken] = rValue;
+ maAttributeTokens.push_back( nToken );
+ maAttributeValues.push_back( rValue );
}
void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& rValue )
@@ -82,55 +82,53 @@ void FastAttributeList::addUnknown( const OString& rName, const OString& rValue
// XFastAttributeList
sal_Bool FastAttributeList::hasAttribute( ::sal_Int32 Token ) throw (RuntimeException)
{
- maLastIter = maAttributes.find( Token );
- return ( maLastIter != maAttributes.end() ) ? sal_True : sal_False;
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return sal_True;
+
+ return sal_False;
}
sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) throw (SAXException, RuntimeException)
{
- if( ( maLastIter == maAttributes.end() ) || ( ( *maLastIter ).first != Token ) )
- maLastIter = maAttributes.find( Token );
-
- if( maLastIter == maAttributes.end() )
- throw SAXException();
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ {
+ Sequence< sal_Int8 > aSeq( (sal_Int8*) maAttributeValues[i].getStr(), maAttributeValues[i].getLength() );
+ return mxTokenHandler->getTokenFromUTF8( aSeq );
+ }
- Sequence< sal_Int8 > aSeq( (sal_Int8*)(*maLastIter).second.getStr(), (*maLastIter).second.getLength() ) ;
- return mxTokenHandler->getTokenFromUTF8( aSeq );
+ throw SAXException();
}
sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int32 Default ) throw (RuntimeException)
{
- if( ( maLastIter == maAttributes.end() ) || ( ( *maLastIter ).first != Token ) )
- maLastIter = maAttributes.find( Token );
-
- if( maLastIter == maAttributes.end() )
- return Default;
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ {
+ Sequence< sal_Int8 > aSeq( (sal_Int8*) maAttributeValues[i].getStr(), maAttributeValues[i].getLength() );
+ return mxTokenHandler->getTokenFromUTF8( aSeq );
+ }
- Sequence< sal_Int8 > aSeq( (sal_Int8*)(*maLastIter).second.getStr(), (*maLastIter).second.getLength() ) ;
- return mxTokenHandler->getTokenFromUTF8( aSeq );
+ return Default;
}
OUString FastAttributeList::getValue( ::sal_Int32 Token ) throw (SAXException, RuntimeException)
{
- if( ( maLastIter == maAttributes.end() ) || ( ( *maLastIter ).first != Token ) )
- maLastIter = maAttributes.find( Token );
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return OStringToOUString( maAttributeValues[i], RTL_TEXTENCODING_UTF8 );
- if( maLastIter == maAttributes.end() )
- throw SAXException();
-
- return OStringToOUString( (*maLastIter).second, RTL_TEXTENCODING_UTF8 );
+ throw SAXException();
}
OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token ) throw (RuntimeException)
{
- if( ( maLastIter == maAttributes.end() ) || ( ( *maLastIter ).first != Token ) )
- maLastIter = maAttributes.find( Token );
-
- OUString aRet;
- if( maLastIter != maAttributes.end() )
- aRet = OStringToOUString( (*maLastIter).second, RTL_TEXTENCODING_UTF8 );
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return OStringToOUString( maAttributeValues[i], RTL_TEXTENCODING_UTF8 );
- return aRet;
+ return OUString();
}
Sequence< Attribute > FastAttributeList::getUnknownAttributes( ) throw (RuntimeException)
{
@@ -142,13 +140,12 @@ Sequence< Attribute > FastAttributeList::getUnknownAttributes( ) throw (Runtime
}
Sequence< FastAttribute > FastAttributeList::getFastAttributes( ) throw (RuntimeException)
{
- Sequence< FastAttribute > aSeq( maAttributes.size() );
+ Sequence< FastAttribute > aSeq( maAttributeTokens.size() );
FastAttribute* pAttr = aSeq.getArray();
- FastAttributeMap::iterator fastAttrIter = maAttributes.begin();
- for(; fastAttrIter != maAttributes.end(); ++fastAttrIter )
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
{
- pAttr->Token = fastAttrIter->first;
- pAttr->Value = OStringToOUString( fastAttrIter->second, RTL_TEXTENCODING_UTF8 );
+ pAttr->Token = maAttributeTokens[i];
+ pAttr->Value = OStringToOUString( maAttributeValues[i], RTL_TEXTENCODING_UTF8 );
pAttr++;
}
return aSeq;
commit 132a8b9158e9e97f3fd4f7fc423a4e51f69ecf95
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Fri Oct 11 13:11:44 2013 +0200
fastparser: remove duplicated OString version methods
Also remove AttributeData and make the two for-cycles faster.
Change-Id: If0343992173bb333eee39c33bcb86acaa5a3d288
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index c67658a..2387d23 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -310,11 +310,6 @@ void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNa
// --------------------------------------------------------------------
-sal_Int32 FastSaxParser::GetToken( const OString& rToken )
-{
- return GetToken( rToken.getStr(), rToken.getLength() );
-}
-
sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
{
sal_Int32 nRet;
@@ -346,34 +341,6 @@ sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0
// --------------------------------------------------------------------
-sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException)
-{
- sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
-
- Entity& rEntity = getEntity();
- sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
- while( nNamespace-- )
- {
- if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
- {
- nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
- break;
- }
-
- if( !nNamespace )
- throw SAXException(); // prefix that has no defined namespace url
- }
-
- if( nNamespaceToken != FastToken::DONTKNOW )
- {
- sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() );
- if( nNameToken != FastToken::DONTKNOW )
- return nNamespaceToken | nNameToken;
- }
-
- return FastToken::DONTKNOW;
-}
-
sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException)
{
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
@@ -763,17 +730,6 @@ void FastSaxParser::parse()
//
//-----------------------------------------
-namespace {
-
-struct AttributeData
-{
- OString maPrefix;
- OString maName;
- OString maValue;
-};
-
-} // namespace
-
void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes )
{
Entity& rEntity = getEntity();
@@ -790,7 +746,6 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
rEntity.mxAttributes->clear();
// create attribute map and process namespace instructions
- int i = 0;
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
const XML_Char *pPrefix;
@@ -804,10 +759,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
First, process all namespace attributes and cache other attributes in a
vector. Second, process the attributes after namespaces have been
initialized. */
- ::std::vector< AttributeData > aAttribs;
// #158414# first: get namespaces
- for( ; awAttributes[i]; i += 2 )
+ for (int i = 0; awAttributes[i]; i += 2)
{
assert(awAttributes[i+1]);
@@ -818,13 +772,6 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
{
DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] );
}
- else
- {
- aAttribs.resize( aAttribs.size() + 1 );
- aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen );
- aAttribs.back().maName = OString( pName, nNameLen );
- aAttribs.back().maValue = OString( awAttributes[i+1] );
- }
}
else
{
@@ -833,33 +780,35 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
// namespace of the element found
aNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
}
- else
- {
- aAttribs.resize( aAttribs.size() + 1 );
- aAttribs.back().maName = OString( pName, nNameLen );
- aAttribs.back().maValue = OString( awAttributes[i+1] );
- }
}
}
// #158414# second: fill attribute list with other attributes
- for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt )
+ for (int i = 0; awAttributes[i]; i += 2)
{
- if( !aIt->maPrefix.isEmpty() )
+ splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen );
+ if( nPrefixLen )
{
- sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName );
- if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
- else
- rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue );
+ if( (nPrefixLen != 5) || (strncmp( pPrefix, "xmlns", 5 ) != 0) )
+ {
+ sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
+ if( nAttributeToken != FastToken::DONTKNOW )
+ rEntity.mxAttributes->add( nAttributeToken, OString(awAttributes[i+1]) );
+ else
+ rEntity.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ),
+ OString(pName, nNameLen), OString(awAttributes[i+1]) );
+ }
}
else
{
- sal_Int32 nAttributeToken = GetToken( aIt->maName );
- if( nAttributeToken != FastToken::DONTKNOW )
- rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
- else
- rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue );
+ if( (nNameLen != 5) || (strcmp( pName, "xmlns" ) != 0) )
+ {
+ sal_Int32 nAttributeToken = GetToken( pName, nNameLen );
+ if( nAttributeToken != FastToken::DONTKNOW )
+ rEntity.mxAttributes->add( nAttributeToken, OString(awAttributes[i+1]) );
+ else
+ rEntity.mxAttributes->addUnknown( OString(pName, nNameLen), OString(awAttributes[i+1]) );
+ }
}
}
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 5d96645..3e6398b 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -151,9 +151,7 @@ public:
private:
void parse();
- sal_Int32 GetToken( const OString& rToken );
sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 );
- sal_Int32 GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (::com::sun::star::xml::sax::SAXException);
sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException);
OUString GetNamespaceURL( const OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException);
OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException);
commit 1ad87394a4ddd27be4419558791fdbef453bb650
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Tue Oct 8 15:17:01 2013 +0200
fastparser: isolate calls to XFastContextHandler in Entity's methods
Unfortunately, we have to store maNamespace in two stacks.
One for future parser thread to compute tokens and one for
main thread calling XFastContextHandler.
Now we are prepared to implement multithreading.
Change-Id: I421b55afa2e9ea80142e6068c7a515d31913ba69
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 623638a..c67658a 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -23,7 +23,6 @@
#include <rtl/ustrbuf.hxx>
#include <com/sun/star/lang/DisposedException.hpp>
-#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp>
#include <com/sun/star/xml/sax/FastToken.hpp>
@@ -41,18 +40,15 @@ using namespace ::com::sun::star::io;
namespace sax_fastparser {
-// --------------------------------------------------------------------
-
-struct SaxContextImpl
+SaxContext::SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
+ mnElementToken(nElementToken)
{
- Reference< XFastContextHandler > mxContext;
- sal_Int32 mnElementToken;
- OUString maNamespace;
- OUString maElementName;
-
- SaxContextImpl() { mnElementToken = 0; }
- SaxContextImpl( const SaxContextImplPtr& p ) { mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
-};
+ if (nElementToken == FastToken::DONTKNOW)
+ {
+ maNamespace = aNamespace;
+ maElementName = aElementName;
+ }
+}
// --------------------------------------------------------------------
@@ -193,52 +189,105 @@ Entity::~Entity()
{
}
-// --------------------------------------------------------------------
-// FastSaxParser implementation
-// --------------------------------------------------------------------
-
-FastSaxParser::FastSaxParser()
+void Entity::startElement( sal_Int32 nElementToken, const OUString& aNamespace,
+ const OUString& aElementName, FastAttributeList *pAttributes )
{
- mxDocumentLocator.set( new FastLocatorImpl( this ) );
- maUtf8Buffer.realloc( mnUtf8BufferSize );
-}
+ Reference< XFastContextHandler > xParentContext;
+ if( !maContextStack.empty() )
+ {
+ xParentContext = maContextStack.top().mxContext;
+ if (!xParentContext.is())
+ {
+ maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
+ return;
+ }
+ }
-// --------------------------------------------------------------------
+ maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
-FastSaxParser::~FastSaxParser()
-{
- if( mxDocumentLocator.is() )
- mxDocumentLocator->dispose();
+ try
+ {
+ Reference< XFastAttributeList > xAttr( pAttributes );
+ Reference< XFastContextHandler > xContext;
+ if( nElementToken == FastToken::DONTKNOW )
+ {
+ if( xParentContext.is() )
+ xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
+ else if( mxDocumentHandler.is() )
+ xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
+
+ if( xContext.is() )
+ {
+ xContext->startUnknownElement( aNamespace, aElementName, xAttr );
+ }
+ }
+ else
+ {
+ if( xParentContext.is() )
+ xContext = xParentContext->createFastChildContext( nElementToken, xAttr );
+ else if( mxDocumentHandler.is() )
+ xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
+
+ if( xContext.is() )
+ {
+ xContext->startFastElement( nElementToken, xAttr );
+ }
+ }
+ maContextStack.top().mxContext = xContext;
+ }
+ catch (const Exception& e)
+ {
+ maSavedException <<= e;
+ }
}
-// --------------------------------------------------------------------
+void Entity::characters( const OUString& sChars )
+{
+ const Reference< XFastContextHandler >& xContext( maContextStack.top().mxContext );
+ if( xContext.is() ) try
+ {
+ xContext->characters( sChars );
+ }
+ catch (const Exception& e)
+ {
+ maSavedException <<= e;
+ }
+}
-void FastSaxParser::pushContext()
+void Entity::endElement()
{
- Entity& rEntity = getEntity();
- if( rEntity.maContextStack.empty() )
+ const SaxContext& aContext = maContextStack.top();
+ const Reference< XFastContextHandler >& xContext( aContext.mxContext );
+ if( xContext.is() ) try
{
- rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) );
- rEntity.maNamespaceCount.push(0);
- DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
+ sal_Int32 nElementToken = aContext.mnElementToken;
+ if( nElementToken != FastToken::DONTKNOW )
+ xContext->endFastElement( nElementToken );
+ else
+ xContext->endUnknownElement( aContext.maNamespace.get(), aContext.maElementName.get() );
}
- else
+ catch (const Exception& e)
{
- rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) );
- rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
+ maSavedException <<= e;
}
+ maContextStack.pop();
+}
+// --------------------------------------------------------------------
+// FastSaxParser implementation
+// --------------------------------------------------------------------
+
+FastSaxParser::FastSaxParser()
+{
+ mxDocumentLocator.set( new FastLocatorImpl( this ) );
+ maUtf8Buffer.realloc( mnUtf8BufferSize );
}
// --------------------------------------------------------------------
-void FastSaxParser::popContext()
+FastSaxParser::~FastSaxParser()
{
- Entity& rEntity = getEntity();
- assert(!rEntity.maContextStack.empty()); // pop without push?
- if( !rEntity.maContextStack.empty() )
- rEntity.maContextStack.pop();
- if( !rEntity.maNamespaceCount.empty() )
- rEntity.maNamespaceCount.pop();
+ if( mxDocumentLocator.is() )
+ mxDocumentLocator->dispose();
}
// --------------------------------------------------------------------
@@ -727,20 +776,16 @@ struct AttributeData
void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes )
{
- Reference< XFastContextHandler > xParentContext;
Entity& rEntity = getEntity();
- if( !rEntity.maContextStack.empty() )
+ if( rEntity.maNamespaceCount.empty() )
{
- xParentContext = rEntity.maContextStack.top()->mxContext;
- if( !xParentContext.is() )
- {
- // we ignore current elements, so no processing needed
- pushContext();
- return;
- }
+ rEntity.maNamespaceCount.push(0);
+ DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
+ }
+ else
+ {
+ rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
}
-
- pushContext();
rEntity.mxAttributes->clear();
@@ -749,6 +794,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
sal_Int32 nNameLen, nPrefixLen;
const XML_Char *pName;
const XML_Char *pPrefix;
+ OUString aNamespace;
+ if (!rEntity.maNamespaceStack.empty())
+ aNamespace = rEntity.maNamespaceStack.top();
try
{
@@ -783,7 +831,7 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
{
// namespace of the element found
- rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
+ aNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
}
else
{
@@ -819,48 +867,18 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
if( nPrefixLen > 0 )
nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
- else if( !rEntity.maContextStack.top()->maNamespace.isEmpty() )
- nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen );
+ else if( !aNamespace.isEmpty() )
+ nElementToken = GetTokenWithNamespaceURL( aNamespace, pName, nNameLen );
else
nElementToken = GetToken( pName );
- rEntity.maContextStack.top()->mnElementToken = nElementToken;
- Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() );
- Reference< XFastContextHandler > xContext;
if( nElementToken == FastToken::DONTKNOW )
- {
if( nPrefixLen > 0 )
- rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
-
- const OUString aNamespace( rEntity.maContextStack.top()->maNamespace );
- const OUString aElementName( pName, nNameLen, RTL_TEXTENCODING_UTF8 );
- rEntity.maContextStack.top()->maElementName = aElementName;
-
- if( xParentContext.is() )
- xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
- else if( rEntity.mxDocumentHandler.is() )
- xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
+ aNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
- if( xContext.is() )
- {
- rEntity.maContextStack.top()->mxContext = xContext;
- xContext->startUnknownElement( aNamespace, aElementName, xAttr );
- }
- }
- else
- {
- if( xParentContext.is() )
- xContext = xParentContext->createFastChildContext( nElementToken, xAttr );
- else if( rEntity.mxDocumentHandler.is() )
- xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
-
-
- if( xContext.is() )
- {
- rEntity.maContextStack.top()->mxContext = xContext;
- xContext->startFastElement( nElementToken, xAttr );
- }
- }
+ rEntity.maNamespaceStack.push(aNamespace);
+ rEntity.startElement( nElementToken, aNamespace,
+ OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), rEntity.mxAttributes.get() );
}
catch (const Exception& e)
{
@@ -871,41 +889,21 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
void FastSaxParser::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* )
{
Entity& rEntity = getEntity();
- assert(!rEntity.maContextStack.empty()); // no context?
- if( !rEntity.maContextStack.empty() )
- {
- SaxContextImplPtr pContext = rEntity.maContextStack.top();
- const Reference< XFastContextHandler >& xContext( pContext->mxContext );
- if( xContext.is() ) try
- {
- sal_Int32 nElementToken = pContext->mnElementToken;
- if( nElementToken != FastToken::DONTKNOW )
- xContext->endFastElement( nElementToken );
- else
- xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName );
- }
- catch (const Exception& e)
- {
- rEntity.maSavedException <<= e;
- }
+ assert( !rEntity.maNamespaceCount.empty() );
+ if( !rEntity.maNamespaceCount.empty() )
+ rEntity.maNamespaceCount.pop();
- popContext();
- }
+ assert( !rEntity.maNamespaceStack.empty() );
+ if( !rEntity.maNamespaceStack.empty() )
+ rEntity.maNamespaceStack.pop();
+
+ rEntity.endElement();
}
void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
{
- Entity& rEntity = getEntity();
- const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext );
- if( xContext.is() ) try
- {
- xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
- }
- catch (const Exception& e)
- {
- rEntity.maSavedException <<= e;
- }
+ getEntity().characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
}
void FastSaxParser::callbackEntityDecl(
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index f7a39f2..5d96645 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -22,12 +22,14 @@
#include <vector>
#include <stack>
-#include <boost/unordered_map.hpp>
+#include <boost/optional.hpp>
#include <boost/shared_ptr.hpp>
+#include <boost/unordered_map.hpp>
#include <rtl/ref.hxx>
+#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
+#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
#include <com/sun/star/xml/sax/XFastParser.hpp>
#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
-#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
#include <com/sun/star/lang/XServiceInfo.hpp>
#include <cppuhelper/implbase2.hxx>
@@ -43,9 +45,7 @@ namespace sax_fastparser {
class FastLocatorImpl;
struct NamespaceDefine;
-struct SaxContextImpl;
-typedef ::boost::shared_ptr< SaxContextImpl > SaxContextImplPtr;
typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef;
typedef ::boost::unordered_map< OUString, sal_Int32,
@@ -53,6 +53,17 @@ typedef ::boost::unordered_map< OUString, sal_Int32,
// --------------------------------------------------------------------
+struct SaxContext
+{
+ ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastContextHandler > mxContext;
+ sal_Int32 mnElementToken;
+ boost::optional< OUString > maNamespace;
+ boost::optional< OUString > maElementName;
+ SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName );
+};
+
+// --------------------------------------------------------------------
+
struct ParserData
{
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler;
@@ -79,13 +90,21 @@ struct Entity : public ParserData
// therefore the exception must be saved somewhere.
::com::sun::star::uno::Any maSavedException;
- ::std::stack< SaxContextImplPtr > maContextStack;
+ ::std::stack< OUString > maNamespaceStack;
+ /* Context for main thread consuming events.
+ * startElement() stores the data, which characters() and endElement() uses
+ */
+ ::std::stack< SaxContext> maContextStack;
// Determines which elements of maNamespaceDefines are valid in current context
::std::stack< sal_uInt32 > maNamespaceCount;
::std::vector< NamespaceDefineRef > maNamespaceDefines;
explicit Entity( const ParserData& rData );
~Entity();
+ void startElement( sal_Int32 nElementToken, const OUString& aNamespace,
+ const OUString& aElementName, FastAttributeList *pAttributes );
+ void characters( const OUString& sChars );
+ void endElement();
};
// --------------------------------------------------------------------
commit 7ff886034f3e6e7911f2b8074f74d34f32156d01
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Tue Oct 8 12:42:21 2013 +0200
fastparser: store mnNamespaceCount in another stack
This is preparation work for multithreading.
mnNamespaceCount will be handled in parser thread and the rest in main
thread.
Change-Id: I571026ea499f6876b8dafb4e1bdc56d1add649e5
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index cc1134b..623638a 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -46,13 +46,12 @@ namespace sax_fastparser {
struct SaxContextImpl
{
Reference< XFastContextHandler > mxContext;
- sal_uInt32 mnNamespaceCount;
sal_Int32 mnElementToken;
OUString maNamespace;
OUString maElementName;
- SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; }
- SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
+ SaxContextImpl() { mnElementToken = 0; }
+ SaxContextImpl( const SaxContextImplPtr& p ) { mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
};
// --------------------------------------------------------------------
@@ -220,11 +219,13 @@ void FastSaxParser::pushContext()
if( rEntity.maContextStack.empty() )
{
rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) );
+ rEntity.maNamespaceCount.push(0);
DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
}
else
{
rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) );
+ rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
}
}
@@ -236,6 +237,8 @@ void FastSaxParser::popContext()
assert(!rEntity.maContextStack.empty()); // pop without push?
if( !rEntity.maContextStack.empty() )
rEntity.maContextStack.pop();
+ if( !rEntity.maNamespaceCount.empty() )
+ rEntity.maNamespaceCount.pop();
}
// --------------------------------------------------------------------
@@ -243,10 +246,10 @@ void FastSaxParser::popContext()
void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL )
{
Entity& rEntity = getEntity();
- assert(!rEntity.maContextStack.empty()); // need a context!
- if( !rEntity.maContextStack.empty() )
+ assert(!rEntity.maNamespaceCount.empty()); // need a context!
+ if( !rEntity.maNamespaceCount.empty() )
{
- sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++;
+ sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
if( rEntity.maNamespaceDefines.size() <= nOffset )
rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
@@ -299,7 +302,7 @@ sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OStri
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
Entity& rEntity = getEntity();
- sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
+ sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
while( nNamespace-- )
{
if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
@@ -327,7 +330,7 @@ sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefix
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
Entity& rEntity = getEntity();
- sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
+ sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
while( nNamespace-- )
{
const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
@@ -368,9 +371,9 @@ sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL )
OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException)
{
Entity& rEntity = getEntity();
- if( !rEntity.maContextStack.empty() )
+ if( !rEntity.maNamespaceCount.empty() )
{
- sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
+ sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
while( nNamespace-- )
if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
@@ -382,9 +385,9 @@ OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXExce
OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException)
{
Entity& rEntity = getEntity();
- if( pPrefix && !rEntity.maContextStack.empty() )
+ if( pPrefix && !rEntity.maNamespaceCount.empty() )
{
- sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
+ sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
while( nNamespace-- )
{
const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index e75ee0f..f7a39f2 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -80,6 +80,8 @@ struct Entity : public ParserData
::com::sun::star::uno::Any maSavedException;
::std::stack< SaxContextImplPtr > maContextStack;
+ // Determines which elements of maNamespaceDefines are valid in current context
+ ::std::stack< sal_uInt32 > maNamespaceCount;
::std::vector< NamespaceDefineRef > maNamespaceDefines;
explicit Entity( const ParserData& rData );
commit 46245ad6cf81932a5fc9e8be582f11d486ed435e
Author: Michael Meeks <michael.meeks at collabora.com>
Date: Fri Oct 11 22:51:44 2013 +0100
oox: special-case single-character a-z token mapping.
Change-Id: I58a810cc6062d5b42558dd5c0f37426a8a210f40
diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx
index 84dc70d..fb9ebc5 100644
--- a/include/oox/token/tokenmap.hxx
+++ b/include/oox/token/tokenmap.hxx
@@ -58,6 +58,7 @@ private:
typedef ::std::vector< TokenName > TokenNameVector;
TokenNameVector maTokenNames;
+ sal_Int32 mnAlphaTokens[26];
};
// ============================================================================
diff --git a/oox/source/token/tokenmap.cxx b/oox/source/token/tokenmap.cxx
index 5460b7c..ea3e621e 100644
--- a/oox/source/token/tokenmap.cxx
+++ b/oox/source/token/tokenmap.cxx
@@ -79,6 +79,13 @@ TokenMap::TokenMap() :
append( nToken ).append( ", '" ).append( aUtf8Name ).append( '\'' ).getStr() );
}
#endif
+
+ for (unsigned char c = 'a'; c <= 'z'; c++)
+ {
+ struct xmltoken* pToken = Perfect_Hash::in_word_set(
+ reinterpret_cast< const char* >( &c ), 1 );
+ mnAlphaTokens[ c - 'a' ] = pToken ? pToken->nToken : XML_TOKEN_INVALID;
+ }
}
TokenMap::~TokenMap()
@@ -108,6 +115,13 @@ Sequence< sal_Int8 > TokenMap::getUtf8TokenName( sal_Int32 nToken ) const
sal_Int32 TokenMap::getTokenFromUtf8( const Sequence< sal_Int8 >& rUtf8Name ) const
{
+ // 50% of OOXML tokens are primarily 1 lower-case character, a-z
+ if( rUtf8Name.getLength() == 1)
+ {
+ sal_Char c = rUtf8Name[0];
+ if (c >= 'a' && c <= 'z')
+ return mnAlphaTokens[ c - 'a' ];
+ }
struct xmltoken* pToken = Perfect_Hash::in_word_set(
reinterpret_cast< const char* >( rUtf8Name.getConstArray() ), rUtf8Name.getLength() );
return pToken ? pToken->nToken : XML_TOKEN_INVALID;
commit fcde84ac4ab7ef69fa64c159de8752eca440f8d9
Author: Matúš Kukan <matus.kukan at gmail.com>
Date: Wed Oct 16 12:57:45 2013 +0200
sax: add unit test for FastAttributeList
Change-Id: Ie87c80383991dca84b4f6e2074c5c53567ded0b6
diff --git a/sax/CppunitTest_sax_attributes.mk b/sax/CppunitTest_sax_attributes.mk
new file mode 100644
index 0000000..eee7f06
--- /dev/null
+++ b/sax/CppunitTest_sax_attributes.mk
@@ -0,0 +1,27 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sax_attributes))
+
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list