[Libreoffice-commits] core.git: sax/qa sax/source

Mohammed Abdul Azeem azeemmysore at gmail.com
Wed Aug 10 11:10:26 UTC 2016


 sax/qa/cppunit/xmlimport.cxx               |   89 +++++++++++++++++++++++++++
 sax/source/fastparser/fastparser.cxx       |   77 ++++++++++-------------
 sax/source/fastparser/legacyfastparser.cxx |   94 +++++++++++++++++++++++++----
 3 files changed, 206 insertions(+), 54 deletions(-)

New commits:
commit 18edd88edc0c45d9c3b8f6faa45bab481ec078f5
Author: Mohammed Abdul Azeem <azeemmysore at gmail.com>
Date:   Wed Aug 3 17:27:48 2016 +0530

    GSoC - Making legacyfastparser to use tokens:
    
    This tokenizes some elements, de-tokenize while consuming
    and emits elements through legacy interface. DummyTokenHandler
    is just to test the correctness.
    
    Change-Id: I1ea1e4d806ed4d426215f93b3f6b66a9776f6479
    Reviewed-on: https://gerrit.libreoffice.org/27849
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Noel Grandin <noelgrandin at gmail.com>

diff --git a/sax/qa/cppunit/xmlimport.cxx b/sax/qa/cppunit/xmlimport.cxx
index f7fcd73..c50a674 100644
--- a/sax/qa/cppunit/xmlimport.cxx
+++ b/sax/qa/cppunit/xmlimport.cxx
@@ -24,6 +24,7 @@
 #include <test/bootstrapfixture.hxx>
 #include <cppuhelper/weak.hxx>
 #include <cppuhelper/implbase.hxx>
+#include <com/sun/star/beans/Pair.hpp>
 #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
 #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
 #include <com/sun/star/xml/sax/XFastAttributeList.hpp>
@@ -40,6 +41,7 @@
 #include <osl/conditn.hxx>
 #include <unotools/ucbstreamhelper.hxx>
 #include <unotools/streamwrap.hxx>
+#include <sax/fastattribs.hxx>
 #include <string>
 #include <stack>
 #include <deque>
@@ -264,6 +266,73 @@ void SAL_CALL NSDocumentHandler::startElement( const OUString& aName, const Refe
         CPPUNIT_ASSERT(false);
 }
 
+class DummyTokenHandler : public cppu::WeakImplHelper< XFastTokenHandler >,
+                          public sax_fastparser::FastTokenHandlerBase
+{
+public:
+    const static OUString tokens[];
+    const static OUString namespaceURIs[];
+    const static OUString namespacePrefixes[];
+
+    // XFastTokenHandler
+    virtual Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken )
+        throw (css::uno::RuntimeException, std::exception) override;
+    virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier )
+        throw (css::uno::RuntimeException, std::exception) override;
+    //FastTokenHandlerBase
+    virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override;
+};
+
+const OUString DummyTokenHandler::tokens[] = { "Signature", "CanonicalizationMethod", "Algorithm", "Type",
+                                              "DigestMethod", "Reference", "document",
+                                              "spacing", "Player", "Height" };
+
+const OUString DummyTokenHandler::namespaceURIs[] = { "http://www.w3.org/2000/09/xmldsig#",
+                                                  "http://schemas.openxmlformats.org/wordprocessingml/2006/main/",
+                                                  "xyzsports.com/players/football/" };
+
+const OUString DummyTokenHandler::namespacePrefixes[] = { "", "w", "Player" };
+
+Sequence< sal_Int8 > DummyTokenHandler::getUTF8Identifier( sal_Int32 nToken )
+    throw (uno::RuntimeException, std::exception)
+{
+    OString aUtf8Token;
+    if ( ( ( nToken & 0xffff0000 ) != 0 ) ) //namespace
+    {
+        sal_uInt32 nNamespaceToken = ( nToken >> 16 ) - 1;
+        if ( nNamespaceToken < sizeof( namespacePrefixes ) / sizeof( OUString ) )
+            aUtf8Token = OUStringToOString( namespacePrefixes[ nNamespaceToken ], RTL_TEXTENCODING_UTF8 );
+    }
+    else //element or attribute
+    {
+        sal_uInt32 nElementToken = nToken & 0xffff;
+        if ( nElementToken < sizeof( tokens ) / sizeof( OUString ) )
+            aUtf8Token = OUStringToOString( tokens[ nElementToken ], RTL_TEXTENCODING_UTF8 );
+    }
+    Sequence< sal_Int8 > aSeq = Sequence< sal_Int8 >( reinterpret_cast< const sal_Int8* >(
+                aUtf8Token.getStr() ), aUtf8Token.getLength() );
+    return aSeq;
+}
+
+sal_Int32 DummyTokenHandler::getTokenFromUTF8( const uno::Sequence< sal_Int8 >& rIdentifier )
+    throw (uno::RuntimeException, std::exception)
+{
+    return getTokenDirect( reinterpret_cast< const char* >(
+                    rIdentifier.getConstArray() ), rIdentifier.getLength() );
+}
+
+sal_Int32 DummyTokenHandler::getTokenDirect( const char* pToken, sal_Int32 nLength ) const
+{
+    OUString sToken( pToken, nLength, RTL_TEXTENCODING_UTF8 );
+    for( sal_uInt16  i = 0; i < sizeof(tokens)/sizeof(OUString); i++ )
+    {
+        if ( tokens[i] == sToken )
+            return (sal_Int32)i;
+    }
+    return FastToken::DONTKNOW;
+}
+
+
 class XMLImportTest : public test::BootstrapFixture
 {
 private:
@@ -298,6 +367,26 @@ void XMLImportTest::setUp()
     m_xLegacyFastParser.set( xContext->getServiceManager()->createInstanceWithContext
                     ( "com.sun.star.xml.sax.LegacyFastParser", xContext ), UNO_QUERY );
     m_xLegacyFastParser->setDocumentHandler( m_xDocumentHandler.get() );
+
+    Reference< XFastTokenHandler > xTokenHandler;
+    xTokenHandler.set( new DummyTokenHandler() );
+    uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser,
+                            uno::UNO_QUERY_THROW);
+    uno::Sequence<uno::Any> args(1);
+    args[0] <<= xTokenHandler;
+    xInit->initialize( args );
+
+    sal_Int32 nNamespaceCount = sizeof( DummyTokenHandler::namespaceURIs ) / sizeof( OUString );
+    uno::Sequence<uno::Any> namespaceArgs( nNamespaceCount + 1 );
+    namespaceArgs[0] <<= OUString( "registerNamespaces" );
+    for (sal_Int32 i = 1; i <= nNamespaceCount; i++ )
+    {
+        css::beans::Pair <OUString, sal_Int32> rPair;
+        rPair = css::beans::Pair<OUString, sal_Int32>( DummyTokenHandler::namespaceURIs[i - 1], i << 16 );
+        namespaceArgs[i] <<= rPair;
+    }
+    xInit->initialize( namespaceArgs );
+
     m_sDirPath = m_directories.getPathFromSrc( "/sax/qa/data/" );
 }
 
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 8fa9223..24811fd 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -434,6 +434,17 @@ void Entity::startElement( Event *pEvent )
     {
         Reference< XFastAttributeList > xAttr( pEvent->mxAttributes.get() );
         Reference< XFastContextHandler > xContext;
+
+        if ( mxNamespaceHandler.is() )
+        {
+            Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
+            sal_uInt16 len = NSDeclAttribs.getLength();
+            for (sal_uInt16 i = 0; i < len; i++)
+            {
+                mxNamespaceHandler->registerNamespace( NSDeclAttribs[i].Name, NSDeclAttribs[i].Value );
+            }
+        }
+
         if( nElementToken == FastToken::DONTKNOW )
         {
             if( pParentContext )
@@ -441,16 +452,6 @@ void Entity::startElement( Event *pEvent )
             else if( mxDocumentHandler.is() )
                 xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
 
-            if ( mxNamespaceHandler.is() )
-            {
-                Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
-                sal_uInt16 len = NSDeclAttribs.getLength();
-                for (sal_uInt16 i = 0; i < len; i++)
-                {
-                    mxNamespaceHandler->registerNamespace( NSDeclAttribs[i].Name, NSDeclAttribs[i].Value );
-                }
-            }
-
             if( xContext.is() )
             {
                 xContext->startUnknownElement( aNamespace, aElementName, xAttr );
@@ -1103,29 +1104,33 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm
 
     try
     {
-        if ( rEntity.mxTokenHandler.is() )
-        {
-            /*  #158414# Each element may define new namespaces, also for attribues.
-                First, process all namespaces, second, process the attributes after namespaces
-                have been initialized. */
+        /*  #158414# Each element may define new namespaces, also for attribues.
+            First, process all namespaces, second, process the attributes after namespaces
+            have been initialized. */
 
-            // #158414# first: get namespaces
-            for (int i = 0; i < numNamespaces * 2; i += 2)
+        // #158414# first: get namespaces
+        for (int i = 0; i < numNamespaces * 2; i += 2)
+        {
+            // namespaces[] is (prefix/URI)
+            if( namespaces[ i ] != nullptr )
             {
-                // namespaces[] is (prefix/URI)
-                if( namespaces[ i ] != nullptr )
-                {
-                        DefineNamespace( OString( XML_CAST( namespaces[ i ] )),
-                            OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ));
-                }
-                else
-                {
-                    // default namespace
-                    sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
-                    nNamespaceToken = GetNamespaceToken( sNamespace );
-                }
+                DefineNamespace( OString( XML_CAST( namespaces[ i ] )),
+                    OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ));
+                if( rEntity.mxNamespaceHandler.is() )
+                    rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
             }
+            else
+            {
+                // default namespace
+                sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
+                nNamespaceToken = GetNamespaceToken( sNamespace );
+                if( rEntity.mxNamespaceHandler.is() )
+                    rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
+            }
+        }
 
+        if ( rEntity.mxTokenHandler.is() )
+        {
             // #158414# second: fill attribute list with other attributes
             for (int i = 0; i < numAttributes * 5; i += 5)
             {
@@ -1159,20 +1164,6 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm
         }
         else
         {
-            for (int i = 0; i < numNamespaces * 2; i += 2)
-            {
-                if( rEntity.mxNamespaceHandler.is() )
-                {
-                    if( namespaces[ i ] != nullptr )
-                        rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
-                    else
-                    {
-                        sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
-                        rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
-                    }
-                }
-            }
-
             for (int i = 0; i < numAttributes * 5; i += 5)
             {
                 if( attributes[ i + 1 ] != nullptr )
diff --git a/sax/source/fastparser/legacyfastparser.cxx b/sax/source/fastparser/legacyfastparser.cxx
index 166f740..dbcc03f 100644
--- a/sax/source/fastparser/legacyfastparser.cxx
+++ b/sax/source/fastparser/legacyfastparser.cxx
@@ -22,6 +22,7 @@
 #include <com/sun/star/xml/sax/FastParser.hpp>
 #include <com/sun/star/xml/sax/FastToken.hpp>
 #include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/beans/Pair.hpp>
 #include <comphelper/attributelist.hxx>
 #include <cppuhelper/supportsservice.hxx>
 #include <comphelper/processfactory.hxx>
@@ -129,16 +130,24 @@ public:
 private:
     Reference< XFastParser > m_xParser;
     Reference< XDocumentHandler > m_xDocumentHandler;
+    Reference< XFastTokenHandler > m_xTokenHandler;
 
 };
 
+
 class CallbackDocumentHandler : public WeakImplHelper< XFastDocumentHandler >
 {
 private:
     Reference< XDocumentHandler > m_xDocumentHandler;
+    Reference< XFastTokenHandler > m_xTokenHandler;
     rtl::Reference< NamespaceHandler > m_aNamespaceHandler;
+    const OUString getNamespacePrefixFromToken( sal_Int32 nToken );
+    const OUString getNameFromToken( sal_Int32 nToken );
+
 public:
-    CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, rtl::Reference< NamespaceHandler > const & rNamespaceHandler );
+    CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler,
+                             rtl::Reference< NamespaceHandler > const & rNamespaceHandler,
+                             Reference< XFastTokenHandler > const & xTokenHandler);
 
     // XFastDocumentHandler
     virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException, exception) override;
@@ -156,10 +165,32 @@ public:
 
 };
 
-CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, rtl::Reference< NamespaceHandler > const & rNamespaceHandler )
+const OUString CallbackDocumentHandler::getNamespacePrefixFromToken( sal_Int32 nToken )
+{
+    if ( ( nToken & 0xffff0000 ) != 0 )
+    {
+        Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff0000 );
+        return OUString( reinterpret_cast< const char* >(
+                        aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 );
+    }
+    else
+        return OUString();
+}
+
+const OUString CallbackDocumentHandler::getNameFromToken( sal_Int32 nToken )
+{
+    Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff );
+    return OUString( reinterpret_cast< const char* >(
+                    aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 );
+}
+
+CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler,
+                                                  rtl::Reference< NamespaceHandler > const & rNamespaceHandler,
+                                                  Reference< XFastTokenHandler > const & xTokenHandler)
 {
    m_xDocumentHandler.set( xDocumentHandler );
    m_aNamespaceHandler.set( rNamespaceHandler.get() );
+   m_xTokenHandler.set( xTokenHandler );
 }
 
 void SAL_CALL CallbackDocumentHandler::startDocument()
@@ -183,9 +214,11 @@ void SAL_CALL CallbackDocumentHandler::setDocumentLocator( const Reference< XLoc
         m_xDocumentHandler->setDocumentLocator( xLocator );
 }
 
-void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32/* nElement */, const Reference< XFastAttributeList >&/* Attribs */ )
+void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32 nElement , const Reference< XFastAttributeList >& Attribs  )
         throw (SAXException, RuntimeException, exception)
 {
+    startUnknownElement( CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ),
+                         CallbackDocumentHandler::getNameFromToken( nElement ), Attribs );
 }
 
 void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs  )
@@ -201,15 +234,29 @@ void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Name
         else
             elementName = Name;
 
+        Sequence< xml::FastAttribute > fastAttribs = Attribs->getFastAttributes();
+        sal_uInt16 len = fastAttribs.getLength();
+        for (sal_uInt16 i = 0; i < len; i++)
+        {
+            OUString& rAttrValue = fastAttribs[i].Value;
+            sal_Int32 nToken = fastAttribs[i].Token;
+            const OUString& rAttrNamespacePrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nToken );
+            OUString sAttrName = CallbackDocumentHandler::getNameFromToken( nToken );
+            if ( !rAttrNamespacePrefix.isEmpty() )
+                sAttrName = rAttrNamespacePrefix + ":" + sAttrName;
+
+            rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue );
+        }
+
         Sequence< xml::Attribute > unknownAttribs = Attribs->getUnknownAttributes();
-        sal_uInt16 len = unknownAttribs.getLength();
+        len = unknownAttribs.getLength();
         for (sal_uInt16 i = 0; i < len; i++)
         {
             OUString& rAttrValue = unknownAttribs[i].Value;
             OUString sAttrName = unknownAttribs[i].Name;
-            OUString& rAttrNamespaceURL = unknownAttribs[i].NamespaceURL;
-            if ( !rAttrNamespaceURL.isEmpty() )
-                sAttrName = rAttrNamespaceURL + ":" + sAttrName;
+            OUString& rAttrNamespacePrefix = unknownAttribs[i].NamespaceURL;
+            if ( !rAttrNamespacePrefix.isEmpty() )
+                sAttrName = rAttrNamespacePrefix + ":" + sAttrName;
 
             rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue );
         }
@@ -217,9 +264,11 @@ void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Name
     }
 }
 
-void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32/* nElement */)
+void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32 nElement )
         throw (SAXException, RuntimeException, exception)
 {
+    endUnknownElement( CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ),
+                       CallbackDocumentHandler::getNameFromToken( nElement ) );
 }
 
 
@@ -267,9 +316,30 @@ SaxLegacyFastParser::SaxLegacyFastParser( ) : m_aNamespaceHandler( new Namespace
 void SAL_CALL SaxLegacyFastParser::initialize(Sequence< Any > const& rArguments )
     throw (RuntimeException, Exception, exception)
 {
-    uno::Reference<lang::XInitialization> const xInit(m_xParser,
+    if (rArguments.getLength())
+    {
+        Reference< XFastTokenHandler > xTokenHandler;
+        OUString str;
+        if ( ( rArguments[0] >>= xTokenHandler ) && xTokenHandler.is() )
+        {
+            m_xTokenHandler.set( xTokenHandler );
+        }
+        else if ( ( rArguments[0] >>= str ) && "registerNamespaces" == str )
+        {
+            css::beans::Pair< OUString, sal_Int32 > rPair;
+            for (sal_Int32 i = 1; i < rArguments.getLength(); i++ )
+            {
+                rArguments[i] >>= rPair;
+                m_xParser->registerNamespace( rPair.First, rPair.Second );
+            }
+        }
+        else
+        {
+            uno::Reference<lang::XInitialization> const xInit(m_xParser,
                             uno::UNO_QUERY_THROW);
-    xInit->initialize( rArguments );
+            xInit->initialize( rArguments );
+        }
+    }
 }
 
 void SaxLegacyFastParser::parseStream( const InputSource& structSource )
@@ -277,7 +347,9 @@ void SaxLegacyFastParser::parseStream( const InputSource& structSource )
                 IOException,
                 RuntimeException, exception)
 {
-    m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler.get(), m_aNamespaceHandler.get() ) );
+    m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler.get(),
+                                       m_aNamespaceHandler.get(), m_xTokenHandler.get() ) );
+    m_xParser->setTokenHandler( m_xTokenHandler );
     m_xParser->parseStream( structSource );
 }
 


More information about the Libreoffice-commits mailing list