[Libreoffice-commits] core.git: Branch 'feature/saxparser' - 3 commits - include/oox oox/source sax/source

Michael Meeks michael.meeks at collabora.com
Mon Oct 14 02:28:29 PDT 2013


 include/oox/token/tokenmap.hxx       |    1 +
 oox/source/token/tokenmap.cxx        |   14 ++++++++++++++
 sax/source/fastparser/fastparser.cxx |   32 +++++++++++++++++++-------------
 sax/source/fastparser/fastparser.hxx |   14 +++++++++++---
 4 files changed, 45 insertions(+), 16 deletions(-)

New commits:
commit 75329f3c8668e06f0829c4c29c8ea3138790d547
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Fri Oct 11 22:51:44 2013 +0100

    oox: special-case single-character a-z token mapping.
    
    Change-Id: I58a810cc6062d5b42558dd5c0f37426a8a210f40

diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx
index 84dc70d..fb9ebc5 100644
--- a/include/oox/token/tokenmap.hxx
+++ b/include/oox/token/tokenmap.hxx
@@ -58,6 +58,7 @@ private:
     typedef ::std::vector< TokenName > TokenNameVector;
 
     TokenNameVector     maTokenNames;
+    sal_Int32           mnAlphaTokens[26];
 };
 
 // ============================================================================
diff --git a/oox/source/token/tokenmap.cxx b/oox/source/token/tokenmap.cxx
index 5460b7c..ea3e621e 100644
--- a/oox/source/token/tokenmap.cxx
+++ b/oox/source/token/tokenmap.cxx
@@ -79,6 +79,13 @@ TokenMap::TokenMap() :
             append( nToken ).append( ", '" ).append( aUtf8Name ).append( '\'' ).getStr() );
     }
 #endif
+
+    for (unsigned char c = 'a'; c <= 'z'; c++)
+    {
+        struct xmltoken* pToken = Perfect_Hash::in_word_set(
+                reinterpret_cast< const char* >( &c ), 1 );
+        mnAlphaTokens[ c - 'a' ] = pToken ? pToken->nToken : XML_TOKEN_INVALID;
+    }
 }
 
 TokenMap::~TokenMap()
@@ -108,6 +115,13 @@ Sequence< sal_Int8 > TokenMap::getUtf8TokenName( sal_Int32 nToken ) const
 
 sal_Int32 TokenMap::getTokenFromUtf8( const Sequence< sal_Int8 >& rUtf8Name ) const
 {
+    // 50% of OOXML tokens are primarily 1 lower-case character, a-z
+    if( rUtf8Name.getLength() == 1)
+    {
+        sal_Char c = rUtf8Name[0];
+        if (c >= 'a' && c <= 'z')
+            return mnAlphaTokens[ c - 'a' ];
+    }
     struct xmltoken* pToken = Perfect_Hash::in_word_set(
         reinterpret_cast< const char* >( rUtf8Name.getConstArray() ), rUtf8Name.getLength() );
     return pToken ? pToken->nToken : XML_TOKEN_INVALID;
commit 44b99d6e145df911b026213c581624bdcc2a70f5
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Fri Oct 11 21:46:45 2013 +0100

    fastparser: cache default namespace token for ooxml.
    
    Change-Id: Iee98ec92380d6d0404ab236e062ddbc2378cda43

diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index a114c4e..de16108 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -486,10 +486,8 @@ OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen
 
 // --------------------------------------------------------------------
 
-sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen )
+sal_Int32 FastSaxParser::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const sal_Char* pName, int nNameLen )
 {
-    sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL );
-
     if( nNamespaceToken != FastToken::DONTKNOW )
     {
         sal_Int32 nNameToken = GetToken( pName, nNameLen );
@@ -958,10 +956,14 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
     sal_Int32 nNameLen, nPrefixLen;
     const XML_Char *pName;
     const XML_Char *pPrefix;
-    OUString aNamespace;
+    OUString sNamespace;
+    sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
     FastAttributeList *pAttributes = new FastAttributeList( rEntity.mxTokenHandler );
     if (!rEntity.maNamespaceStack.empty())
-        aNamespace = rEntity.maNamespaceStack.top();
+    {
+        sNamespace = rEntity.maNamespaceStack.top().msName;
+        nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
+    }
 
     try
     {
@@ -987,8 +989,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
             {
                 if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
                 {
-                    // namespace of the element found
-                    aNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
+                    // default namespace is the attribute value
+                    sNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
+                    nNamespaceToken = GetNamespaceToken( sNamespace );
                 }
             }
         }
@@ -1026,17 +1029,20 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char
         splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
         if( nPrefixLen > 0 )
             nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
-        else if( !aNamespace.isEmpty() )
-            nElementToken = GetTokenWithNamespaceURL( aNamespace, pName, nNameLen );
+        else if( !sNamespace.isEmpty() )
+            nElementToken = GetTokenWithContextNamespace( nNamespaceToken, pName, nNameLen );
         else
             nElementToken = GetToken( pName );
 
         if( nElementToken == FastToken::DONTKNOW )
             if( nPrefixLen > 0 )
-                aNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
+            {
+                sNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
+                nNamespaceToken = GetNamespaceToken( sNamespace );
+            }
 
-        rEntity.maNamespaceStack.push(aNamespace);
-        produce(new Event( CallbackType::START_ELEMENT, nElementToken, aNamespace,
+        rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
+        produce(new Event( CallbackType::START_ELEMENT, nElementToken, sNamespace,
                     OUString(pName, nNameLen, RTL_TEXTENCODING_UTF8), pAttributes ));
     }
     catch (const Exception& e)
diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx
index ae328a9..1fb8c7a 100644
--- a/sax/source/fastparser/fastparser.hxx
+++ b/sax/source/fastparser/fastparser.hxx
@@ -57,6 +57,14 @@ typedef std::vector<Event *> EventList;
 
 enum CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION };
 
+struct NameWithToken
+{
+    OUString msName;
+    sal_Int32 mnToken;
+    NameWithToken(const OUString& sName, const sal_Int32& nToken):
+        msName(sName), mnToken(nToken) {}
+};
+
 struct Event {
     struct argCharacters {
         OUString msChars;
@@ -121,7 +129,7 @@ struct Entity : public ParserData
     // therefore the exception must be saved somewhere.
     ::com::sun::star::uno::Any              maSavedException;
 
-    ::std::stack< OUString >                maNamespaceStack;
+    ::std::stack< NameWithToken >           maNamespaceStack;
     ::std::stack< SaxContextImpl* >         maContextStack;
     // Determines which elements of maNamespaceDefines are valid in current context
     ::std::stack< sal_uInt32 >              maNamespaceCount;
@@ -167,7 +175,7 @@ public:
     void callbackStartElement( const XML_Char* name, const XML_Char** atts );
     void callbackEndElement( const XML_Char* name );
     void callbackCharacters( const XML_Char* s, int nLen );
-    int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId);
+    int  callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId);
     void callbackEntityDecl(const XML_Char *entityName, int is_parameter_entity,
             const XML_Char *value, int value_length, const XML_Char *base,
             const XML_Char *systemId, const XML_Char *publicId,
@@ -188,7 +196,7 @@ private:
     OUString GetNamespaceURL( const OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException);
     OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException);
     sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
-    sal_Int32 GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen );
+    sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const sal_Char* pName, int nNameLen );
     void DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL );
 
     void pushContext();
commit f8315087aa5cf994c821b0e4176a68c0922bb987
Author: Matúš Kukan <matus.kukan at gmail.com>
Date:   Sun Oct 13 14:56:46 2013 +0200

    fix previous commit
    
    Change-Id: I4182391e7967df77d76207b02288dba7e37fd270

diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 731097e..a114c4e 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -930,7 +930,7 @@ void FastSaxParser::parse()
     }
     while( nRead > 0 );
     produce(new Event( CallbackType::DONE ));
-#ifndef FREE_IN_MAIN_THREAD
+#if !FREE_IN_MAIN_THREAD
     deleteUsedEvents();
 #endif
 }


More information about the Libreoffice-commits mailing list