[Libreoffice-commits] core.git: include/sax sax/source

Michael Meeks michael.meeks at collabora.com
Tue Nov 19 12:55:41 PST 2013


 include/sax/fastattribs.hxx          |   13 ++++++++
 sax/source/fastparser/fastparser.cxx |   27 -----------------
 sax/source/fastparser/fastparser.hxx |    4 --
 sax/source/tools/fastattribs.cxx     |   55 +++++++++++++++++++++++++++++------
 4 files changed, 62 insertions(+), 37 deletions(-)

New commits:
commit fc25afaa048033940d9e1d22ec8ba31c5b3e9289
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Tue Nov 19 20:51:35 2013 +0000

    fastparser: accelerate value tokenisation as well.
    
    Change-Id: I99a39e91c684adb1fc92cdb466477cfa90104961

diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx
index f4a9392..c780603 100644
--- a/include/sax/fastattribs.hxx
+++ b/include/sax/fastattribs.hxx
@@ -49,6 +49,18 @@ struct UnknownAttribute
 
 typedef std::vector< UnknownAttribute > UnknownAttributeList;
 
+/// avoid constantly allocating and freeing sequences.
+class SAX_DLLPUBLIC FastTokenLookup
+{
+    static const int mnUtf8BufferSize = 128;
+    ::css::uno::Sequence< sal_Int8 > maUtf8Buffer;
+public:
+    FastTokenLookup();
+    sal_Int32 getTokenFromChars(
+        const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &mxTokenHandler,
+        const char *pStr, size_t nLength = 0 );
+};
+
 class SAX_DLLPUBLIC FastAttributeList : public ::cppu::WeakImplHelper1< ::com::sun::star::xml::sax::XFastAttributeList >
 {
 public:
@@ -83,6 +95,7 @@ private:
     std::vector< sal_Int32 > maAttributeTokens;
     UnknownAttributeList maUnknownAttributes;
     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler;
+    FastTokenLookup maTokenLookup;
 };
 
 }
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 5d64a10..8f68051 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -348,7 +348,6 @@ Event& Entity::getEvent( CallbackType aType )
 FastSaxParser::FastSaxParser()
 {
     mxDocumentLocator.set( new FastLocatorImpl( this ) );
-    maUtf8Buffer.realloc( mnUtf8BufferSize );
 }
 
 // --------------------------------------------------------------------
@@ -381,31 +380,7 @@ void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNa
 
 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
 {
-    sal_Int32 nRet;
-
-    if( !nLen )
-        nLen = strlen( pToken );
-
-    if ( nLen < mnUtf8BufferSize )
-    {
-        // Get intimiate with the underlying sequence cf. sal/types.h
-        sal_Sequence *pSeq = maUtf8Buffer.get();
-
-        sal_Int32 nPreRefCount = pSeq->nRefCount;
-
-        pSeq->nElements = nLen;
-        memcpy( pSeq->elements, pToken, nLen );
-        nRet = getEntity().mxTokenHandler->getTokenFromUTF8( maUtf8Buffer );
-
-        (void)nPreRefCount; // for non-debug mode.
-        assert( pSeq->nRefCount == nPreRefCount ); // callee must not take ref.
-    }
-    else
-    {
-        Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); // heap allocate & free
-        nRet = getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
-    }
-    return nRet;
+    return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, pToken, nLen );
 }
 
 // --------------------------------------------------------------------
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index 972ce07..35deb0c 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -222,9 +222,7 @@ private:
 
     ParserData maData;                      /// Cached parser configuration for next call of parseStream().
     ::std::stack< Entity > maEntities;      /// Entity stack for each call of parseStream().
-
-    static const int mnUtf8BufferSize = 128;
-    ::css::uno::Sequence< sal_Int8 > maUtf8Buffer; /// avoid constantly re-allocating this
+    FastTokenLookup maTokenLookup;
 };
 
 }
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
index 3ebab23..c0a92a1 100644
--- a/sax/source/tools/fastattribs.cxx
+++ b/sax/source/tools/fastattribs.cxx
@@ -114,10 +114,9 @@ sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) throw (SAXExcept
 {
     for (size_t i = 0; i < maAttributeTokens.size(); ++i)
         if (maAttributeTokens[i] == Token)
-        {
-            Sequence< sal_Int8 > aSeq( (sal_Int8*) mpChunk + maAttributeValues[i], AttributeValueLength(i) );
-            return mxTokenHandler->getTokenFromUTF8( aSeq );
-        }
+            return maTokenLookup.getTokenFromChars( mxTokenHandler,
+                                                    mpChunk + maAttributeValues[ i ],
+                                                    AttributeValueLength( i ) );
 
     throw SAXException();
 }
@@ -126,10 +125,9 @@ sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int
 {
     for (size_t i = 0; i < maAttributeTokens.size(); ++i)
         if (maAttributeTokens[i] == Token)
-        {
-            Sequence< sal_Int8 > aSeq( (sal_Int8*) mpChunk + maAttributeValues[i], AttributeValueLength(i) );
-            return mxTokenHandler->getTokenFromUTF8( aSeq );
-        }
+            return maTokenLookup.getTokenFromChars( mxTokenHandler,
+                                                    mpChunk + maAttributeValues[ i ],
+                                                    AttributeValueLength( i ) );
 
     return Default;
 }
@@ -178,6 +176,47 @@ sal_Int32 FastAttributeList::AttributeValueLength(sal_Int32 i)
     return maAttributeValues[i + 1] - maAttributeValues[i] - 1;
 }
 
+FastTokenLookup::FastTokenLookup()
+{
+    maUtf8Buffer.realloc( mnUtf8BufferSize );
+}
+
+/**
+ * Avoid doing any memory allocation if we can, instead keep a
+ * pet sequence around and do some heavy petting on it.
+ */
+sal_Int32 FastTokenLookup::getTokenFromChars(
+        const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &xTokenHandler,
+        const char *pToken, size_t nLen /* = 0 */ )
+{
+    sal_Int32 nRet;
+
+    if( !nLen )
+        nLen = strlen( pToken );
+
+    if ( nLen < mnUtf8BufferSize )
+    {
+        // Get intimiate with the underlying sequence cf. sal/types.h
+        sal_Sequence *pSeq = maUtf8Buffer.get();
+
+        sal_Int32 nPreRefCount = pSeq->nRefCount;
+
+        pSeq->nElements = nLen;
+        memcpy( pSeq->elements, pToken, nLen );
+        nRet = xTokenHandler->getTokenFromUTF8( maUtf8Buffer );
+
+        (void)nPreRefCount; // for non-debug mode.
+        assert( pSeq->nRefCount == nPreRefCount ); // callee must not take ref
+    }
+    else
+    {
+        Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); // heap allocate & free
+        nRet = xTokenHandler->getTokenFromUTF8( aSeq );
+    }
+
+    return nRet;
+}
+
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */


More information about the Libreoffice-commits mailing list