[Libreoffice-commits] core.git: include/sax sax/source writerfilter/source

Noel Grandin noel.grandin at collabora.co.uk
Sun Jul 8 09:48:25 UTC 2018


 include/sax/fastattribs.hxx                |   11 +++++++++
 sax/source/fastparser/fastparser.cxx       |   13 ++++++-----
 sax/source/tools/fastattribs.cxx           |   18 +++++++++++++++-
 writerfilter/source/ooxml/OOXMLFactory.cxx |   32 +++++++++++------------------
 4 files changed, 48 insertions(+), 26 deletions(-)

New commits:
commit a2193f8f33565cc896592acb9d3ab65c756d97fb
Author: Noel Grandin <noel.grandin at collabora.co.uk>
Date:   Thu Jul 5 12:33:51 2018 +0200

    tdf#79878 perf loading docx file, sax improvements
    
    these are the smaller improvements, they make about 5% worth of
    difference
    
    - use std::vector instead of std::deque
    - use std::move on pendingCharacters instead of copying
    - in FastAttributeList::add, when reallocate the buffer, allocate twice
    the existing size, instead of increasing to only what we need
    - in FastAttributeList, create getAttributeIndex and friends, so
    we can avoid iterating the attribute list more often than necessary
    
    Change-Id: I3e3380ea50b77c6845b66e83404e245778ec06eb
    Reviewed-on: https://gerrit.libreoffice.org/57021
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <noel.grandin at collabora.co.uk>

diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx
index b5fbe225ee28..029f3a12f780 100644
--- a/include/sax/fastattribs.hxx
+++ b/include/sax/fastattribs.hxx
@@ -93,6 +93,9 @@ public:
     bool getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const;
     bool getAsDouble( sal_Int32 nToken, double &rDouble) const;
     bool getAsChar( sal_Int32 nToken, const char*& rPos ) const;
+    sal_Int32 getAsIntegerByIndex( sal_Int32 nTokenIndex ) const;
+    const char* getAsCharByIndex( sal_Int32 nTokenIndex ) const;
+    OUString getValueByIndex( sal_Int32 nTokenIndex ) const;
 
     // XFastAttributeList
     virtual sal_Bool SAL_CALL hasAttribute( ::sal_Int32 Token ) override;
@@ -103,6 +106,14 @@ public:
     virtual css::uno::Sequence< css::xml::Attribute > SAL_CALL getUnknownAttributes(  ) override;
     virtual css::uno::Sequence< css::xml::FastAttribute > SAL_CALL getFastAttributes() override;
 
+    sal_Int32 getAttributeIndex( ::sal_Int32 Token )
+    {
+        for (size_t i=0; i<maAttributeTokens.size(); ++i)
+            if (maAttributeTokens[i] == Token)
+                return i;
+        return -1;
+    }
+
     static FastAttributeList* castToFastAttributeList(
                         const css::uno::Reference< css::xml::sax::XFastAttributeList >& xAttrList )
     {
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 2765584a08fc..9477559c12e8 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -178,13 +178,13 @@ struct Entity : public ParserData
     void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
                          bool mbDuringParse );
 
-    std::stack< NameWithToken >           maNamespaceStack;
+    std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
     /* Context for main thread consuming events.
      * startElement() stores the data, which characters() and endElement() uses
      */
-    std::stack< SaxContext>               maContextStack;
+    std::stack< SaxContext, std::vector<SaxContext> >  maContextStack;
     // Determines which elements of maNamespaceDefines are valid in current context
-    std::stack< sal_uInt32 >              maNamespaceCount;
+    std::stack< sal_uInt32, std::vector<sal_uInt32> >  maNamespaceCount;
     std::vector< std::shared_ptr< NamespaceDefine > >
                                           maNamespaceDefines;
 
@@ -270,7 +270,7 @@ private:
     ParserData maData;                      /// Cached parser configuration for next call of parseStream().
 
     Entity *mpTop;                          /// std::stack::top() is amazingly slow => cache this.
-    std::stack< Entity > maEntities;      /// Entity stack for each call of parseStream().
+    std::stack< Entity > maEntities;        /// Entity stack for each call of parseStream().
     OUString pendingCharacters;             /// Data from characters() callback that needs to be sent.
 };
 
@@ -674,11 +674,12 @@ sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, int nPr
     sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
     while( nNamespace-- )
     {
-        const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
+        const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
+        const OString& rPrefix( rNamespaceDefine->maPrefix );
         if( (rPrefix.getLength() == nPrefixLen) &&
             (strncmp( rPrefix.getStr(), XML_CAST( pPrefix ), nPrefixLen ) == 0 ) )
         {
-            nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
+            nNamespaceToken = rNamespaceDefine->mnToken;
             break;
         }
 
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
index a9f0baf7a2c0..30a764368947 100644
--- a/sax/source/tools/fastattribs.cxx
+++ b/sax/source/tools/fastattribs.cxx
@@ -86,7 +86,7 @@ void FastAttributeList::add( sal_Int32 nToken, const sal_Char* pValue, size_t nV
     maAttributeValues.push_back( maAttributeValues.back() + nValueLength + 1 );
     if (maAttributeValues.back() > mnChunkLength)
     {
-        mnChunkLength = maAttributeValues.back();
+        mnChunkLength = std::max(mnChunkLength * 2, maAttributeValues.back());
         mpChunk = static_cast<sal_Char *>(realloc( mpChunk, mnChunkLength ));
     }
     strncpy(mpChunk + nWritePosition, pValue, nValueLength);
@@ -166,6 +166,11 @@ bool FastAttributeList::getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const
     return false;
 }
 
+sal_Int32 FastAttributeList::getAsIntegerByIndex( sal_Int32 nTokenIndex ) const
+{
+    return rtl_str_toInt32( getFastAttributeValue(nTokenIndex), 10 );
+}
+
 bool FastAttributeList::getAsDouble( sal_Int32 nToken, double &rDouble) const
 {
     rDouble = 0.0;
@@ -193,6 +198,12 @@ bool FastAttributeList::getAsChar( sal_Int32 nToken, const char*& rPos ) const
     return false;
 }
 
+const char* FastAttributeList::getAsCharByIndex( sal_Int32 nTokenIndex ) const
+{
+    sal_Int32 nOffset = maAttributeValues[nTokenIndex];
+    return mpChunk + nOffset;
+}
+
 OUString FastAttributeList::getValue( ::sal_Int32 Token )
 {
     for (size_t i = 0; i < maAttributeTokens.size(); ++i)
@@ -202,6 +213,11 @@ OUString FastAttributeList::getValue( ::sal_Int32 Token )
     throw SAXException();
 }
 
+OUString FastAttributeList::getValueByIndex( ::sal_Int32 nTokenIndex ) const
+{
+    return OUString( getFastAttributeValue(nTokenIndex), AttributeValueLength(nTokenIndex), RTL_TEXTENCODING_UTF8 );
+}
+
 OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token )
 {
     for (size_t i = 0; i < maAttributeTokens.size(); ++i)
diff --git a/writerfilter/source/ooxml/OOXMLFactory.cxx b/writerfilter/source/ooxml/OOXMLFactory.cxx
index 91eb7b0e4c96..4c41684cd594 100644
--- a/writerfilter/source/ooxml/OOXMLFactory.cxx
+++ b/writerfilter/source/ooxml/OOXMLFactory.cxx
@@ -36,7 +36,7 @@ OOXMLFactory_ns::~OOXMLFactory_ns()
 // class OOXMLFactory
 
 void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
-                              const uno::Reference< xml::sax::XFastAttributeList > & Attribs)
+                              const uno::Reference< xml::sax::XFastAttributeList > & xAttribs)
 {
     Id nDefine = pHandler->getDefine();
     OOXMLFactory_ns::Pointer_t pFactory = getFactoryForNamespace(nDefine);
@@ -45,7 +45,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
         return;
 
     sax_fastparser::FastAttributeList *pAttribs =
-            sax_fastparser::FastAttributeList::castToFastAttributeList( Attribs );
+            sax_fastparser::FastAttributeList::castToFastAttributeList( xAttribs );
 
     const AttributeInfo *pAttr = pFactory->getAttributeInfoArray(nDefine);
     if (!pAttr)
@@ -54,7 +54,8 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
     for (; pAttr->m_nToken != -1; ++pAttr)
     {
         sal_Int32 nToken = pAttr->m_nToken;
-        if (!pAttribs->hasAttribute(nToken))
+        sal_Int32 nAttrIndex = pAttribs->getAttributeIndex(nToken);
+        if (nAttrIndex == -1)
             continue;
 
         Id nId = pFactory->getResourceId(nDefine, nToken);
@@ -63,8 +64,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
         {
         case ResourceType::Boolean:
             {
-                const char *pValue = "";
-                pAttribs->getAsChar(nToken, pValue);
+                const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue(OOXMLBooleanValue::Create(pValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -72,7 +72,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
             break;
         case ResourceType::String:
             {
-                OUString aValue(pAttribs->getValue(nToken));
+                OUString aValue(pAttribs->getValueByIndex(nAttrIndex));
                 OOXMLValue::Pointer_t xValue(new OOXMLStringValue(aValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -80,8 +80,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
             break;
         case ResourceType::Integer:
             {
-                sal_Int32 nValue;
-                pAttribs->getAsInteger(nToken,nValue);
+                sal_Int32 nValue = pAttribs->getAsIntegerByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue = OOXMLIntegerValue::Create(nValue);
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -89,8 +88,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
             break;
         case ResourceType::Hex:
             {
-                const char *pValue = "";
-                pAttribs->getAsChar(nToken, pValue);
+                const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue(new OOXMLHexValue(pValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -98,8 +96,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
             break;
         case ResourceType::HexColor:
             {
-                const char *pValue = "";
-                pAttribs->getAsChar(nToken, pValue);
+                const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue(new OOXMLHexColorValue(pValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -107,8 +104,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
             break;
         case ResourceType::TwipsMeasure:
             {
-                const char *pValue = "";
-                pAttribs->getAsChar(nToken, pValue);
+                const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue(new OOXMLTwipsMeasureValue(pValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -116,8 +112,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
             break;
         case ResourceType::HpsMeasure:
             {
-                const char *pValue = "";
-                pAttribs->getAsChar(nToken, pValue);
+                const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue(new OOXMLHpsMeasureValue(pValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -125,8 +120,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
         break;
         case ResourceType::MeasurementOrPercent:
             {
-                const char *pValue = "";
-                pAttribs->getAsChar(nToken, pValue);
+                const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex);
                 OOXMLValue::Pointer_t xValue(new OOXMLMeasurementOrPercentValue(pValue));
                 pHandler->newProperty(nId, xValue);
                 pFactory->attributeAction(pHandler, nToken, xValue);
@@ -135,7 +129,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler,
         case ResourceType::List:
             {
                 sal_uInt32 nValue;
-                if (pFactory->getListValue(pAttr->m_nRef, Attribs->getValue(nToken), nValue))
+                if (pFactory->getListValue(pAttr->m_nRef, pAttribs->getValueByIndex(nAttrIndex), nValue))
                 {
                     OOXMLValue::Pointer_t xValue = OOXMLIntegerValue::Create(nValue);
                     pHandler->newProperty(nId, xValue);


More information about the Libreoffice-commits mailing list