[Libreoffice-commits] core.git: 2 commits - oox/source sax/source

Michael Stahl mstahl at redhat.com
Fri Jul 3 15:23:56 PDT 2015


 oox/source/docprop/docprophandler.cxx |   25 +++++
 oox/source/docprop/docprophandler.hxx |    1 
 sax/source/fastparser/fastparser.cxx  |    6 -
 sax/source/tools/fastserializer.cxx   |  155 +++++++++++++++++++++++++++++++++-
 sax/source/tools/fastserializer.hxx   |    9 +
 5 files changed, 190 insertions(+), 6 deletions(-)

New commits:
commit 3c45bfb0cabf206f6217f1de9eb5cfa12b78e46f
Author: Michael Stahl <mstahl at redhat.com>
Date:   Fri Jul 3 23:49:08 2015 +0200

    sax: FastSaxSerializer well-formed element test in presence of ...
    
    ... the maMarkStack, which causes the order of calls to
    startFastElement()/endFastElement() to differ from the order of the tags
    that are written into the output.
    
    This is an attempt to improve the assertions, but if an assertion fails
    it's generally not obvious where the problem actually is since the
    unpredictable order may cause the problem to be detected and reported
    much later than its root cause.
    
    Let's see if this finds any new problems in export testing.
    
    Change-Id: I97699cc8ef9b18ea9f4f221d5210134feecf0336

diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 10e7ab3..a2ba5c15 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -196,7 +196,10 @@ namespace sax_fastparser {
         }
 
 #ifdef DBG_UTIL
-        m_DebugStartedElements.push(Element);
+        if (mbMarkStackEmpty)
+            m_DebugStartedElements.push(Element);
+        else
+            maMarkStack.top()->m_DebugStartedElements.push_back(Element);
 #endif
 
         writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
@@ -213,10 +216,30 @@ namespace sax_fastparser {
     void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
     {
 #ifdef DBG_UTIL
-        assert(!m_DebugStartedElements.empty());
         // Well-formedness constraint: Element Type Match
-        assert(Element == m_DebugStartedElements.top());
-        m_DebugStartedElements.pop();
+        if (mbMarkStackEmpty)
+        {
+            assert(!m_DebugStartedElements.empty());
+            assert(Element == m_DebugStartedElements.top());
+            m_DebugStartedElements.pop();
+        }
+        else
+        {
+            if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+            {
+                // Sort is always well-formed fragment
+                assert(!maMarkStack.top()->m_DebugStartedElements.empty());
+            }
+            if (maMarkStack.top()->m_DebugStartedElements.empty())
+            {
+                maMarkStack.top()->m_DebugEndedElements.push_back(Element);
+            }
+            else
+            {
+                assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
+                maMarkStack.top()->m_DebugStartedElements.pop_back();
+            }
+        }
 #endif
 
         writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
@@ -324,17 +347,98 @@ namespace sax_fastparser {
         mbMarkStackEmpty = false;
     }
 
+#ifdef DBG_UTIL
+    static void lcl_DebugMergeAppend(
+            std::deque<sal_Int32> & rLeftEndedElements,
+            std::deque<sal_Int32> & rLeftStartedElements,
+            std::deque<sal_Int32> & rRightEndedElements,
+            std::deque<sal_Int32> & rRightStartedElements)
+    {
+        while (!rRightEndedElements.empty())
+        {
+            if (rLeftStartedElements.empty())
+            {
+                rLeftEndedElements.push_back(rRightEndedElements.front());
+            }
+            else
+            {
+                assert(rLeftStartedElements.back() == rRightEndedElements.front());
+                rLeftStartedElements.pop_back();
+            }
+            rRightEndedElements.pop_front();
+        }
+        while (!rRightStartedElements.empty())
+        {
+            rLeftStartedElements.push_back(rRightStartedElements.front());
+            rRightStartedElements.pop_front();
+        }
+    }
+
+    static void lcl_DebugMergePrepend(
+            std::deque<sal_Int32> & rLeftEndedElements,
+            std::deque<sal_Int32> & rLeftStartedElements,
+            std::deque<sal_Int32> & rRightEndedElements,
+            std::deque<sal_Int32> & rRightStartedElements)
+    {
+        while (!rLeftStartedElements.empty())
+        {
+            if (rRightEndedElements.empty())
+            {
+                rRightStartedElements.push_front(rLeftStartedElements.back());
+            }
+            else
+            {
+                assert(rRightEndedElements.front() == rLeftStartedElements.back());
+                rRightEndedElements.pop_front();
+            }
+            rLeftStartedElements.pop_back();
+        }
+        while (!rLeftEndedElements.empty())
+        {
+            rRightEndedElements.push_front(rLeftEndedElements.back());
+            rLeftEndedElements.pop_back();
+        }
+    }
+#endif
+
     void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
     {
         SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
         if ( mbMarkStackEmpty )
             return;
 
+#ifdef DBG_UTIL
+        if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+        {
+            // Sort is always well-formed fragment
+            assert(maMarkStack.top()->m_DebugStartedElements.empty());
+            assert(maMarkStack.top()->m_DebugEndedElements.empty());
+        }
+        lcl_DebugMergeAppend(
+            maMarkStack.top()->m_DebugEndedElements,
+            maMarkStack.top()->m_DebugStartedElements,
+            maMarkStack.top()->m_DebugPostponedEndedElements,
+            maMarkStack.top()->m_DebugPostponedStartedElements);
+#endif
+
         // flush, so that we get everything in getData()
         maCachedOutputStream.flush();
 
         if ( maMarkStack.size() == 1  && eMergeType != MERGE_MARKS_IGNORE)
         {
+#if DBG_UTIL
+            while (!maMarkStack.top()->m_DebugEndedElements.empty())
+            {
+                assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
+                maMarkStack.top()->m_DebugEndedElements.pop_front();
+                m_DebugStartedElements.pop();
+            }
+            while (!maMarkStack.top()->m_DebugStartedElements.empty())
+            {
+                m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
+                maMarkStack.top()->m_DebugStartedElements.pop_front();
+            }
+#endif
             Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
             maMarkStack.pop();
             mbMarkStackEmpty = true;
@@ -343,8 +447,51 @@ namespace sax_fastparser {
             return;
         }
 
+#if DBG_UTIL
+        ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
+        ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
+#endif
         const Int8Sequence aMerge( maMarkStack.top()->getData() );
         maMarkStack.pop();
+#if DBG_UTIL
+        switch (eMergeType)
+        {
+            case MERGE_MARKS_APPEND:
+                lcl_DebugMergeAppend(
+                    maMarkStack.top()->m_DebugEndedElements,
+                    maMarkStack.top()->m_DebugStartedElements,
+                    topDebugEndedElements,
+                    topDebugStartedElements);
+                break;
+            case MERGE_MARKS_PREPEND:
+                if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
+                {
+                    lcl_DebugMergeAppend(
+                        maMarkStack.top()->m_DebugEndedElements,
+                        maMarkStack.top()->m_DebugStartedElements,
+                        topDebugEndedElements,
+                        topDebugStartedElements);
+                }
+                else
+                {
+                    lcl_DebugMergePrepend(
+                        topDebugEndedElements,
+                        topDebugStartedElements,
+                        maMarkStack.top()->m_DebugEndedElements,
+                        maMarkStack.top()->m_DebugStartedElements);
+                }
+                break;
+            case MERGE_MARKS_POSTPONE:
+                lcl_DebugMergeAppend(
+                    maMarkStack.top()->m_DebugPostponedEndedElements,
+                    maMarkStack.top()->m_DebugPostponedStartedElements,
+                    topDebugEndedElements,
+                    topDebugStartedElements);
+                break;
+            case MERGE_MARKS_IGNORE:
+                break;
+        }
+#endif
         if (maMarkStack.empty())
         {
             mbMarkStackEmpty = true;
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
index 95e631c..4dc786b 100644
--- a/sax/source/tools/fastserializer.hxx
+++ b/sax/source/tools/fastserializer.hxx
@@ -161,6 +161,15 @@ private:
         Int8Sequence maPostponed;
 
     public:
+#ifdef DBG_UTIL
+        // pending close tags, followed by pending open tags
+        ::std::deque<sal_Int32> m_DebugEndedElements;
+        ::std::deque<sal_Int32> m_DebugStartedElements;
+        // ... and another buffer for maPostponed ...
+        ::std::deque<sal_Int32> m_DebugPostponedEndedElements;
+        ::std::deque<sal_Int32> m_DebugPostponedStartedElements;
+#endif
+
         ForMerge() : maData(), maPostponed() {}
         virtual ~ForMerge() {}
 
commit cf17befa261fd64f95bb4db932ce4d7e149f4a72
Author: Michael Stahl <mstahl at redhat.com>
Date:   Fri Jul 3 22:28:18 2015 +0200

    tdf#91378: sax, oox: avoid sending empty strings to character callbacks
    
    This reverts the changes in FastSaxParserImpl from commit
    16e8ffbd5ec1fe7b81835ea6584547669d55d751 and instead fixes the problem
    of inserting string properties with empty value locally in
    OOXMLDocPropHandler.
    
    This change was not wrong in any obvious way, but it turns out there is
    one doc rhbz583386-4.docx that, when imported with this change and
    exported to DOCX again, results in a non-well-formed document because of
    some weird SDT stuff.
    
    That problem is rather baffling, but unfortunately the
    DocxAttributeOutput usage of FastSaxSerializer::mark() makes the DOCX
    export rather un-debuggable, so avoid that problem by reverting the import
    change for now.
    
    Change-Id: I0d874cbfe82d4f15d58b50116dda152341bdf7b0

diff --git a/oox/source/docprop/docprophandler.cxx b/oox/source/docprop/docprophandler.cxx
index 1a5000b..c5d89b9 100644
--- a/oox/source/docprop/docprophandler.cxx
+++ b/oox/source/docprop/docprophandler.cxx
@@ -45,6 +45,7 @@ OOXMLDocPropHandler::OOXMLDocPropHandler( const uno::Reference< uno::XComponentC
     , m_nBlock( 0 )
     , m_nType( 0 )
     , m_nInBlock( 0 )
+    , m_CustomStringPropertyState(NONE)
 {
     if ( !xContext.is() || !rDocProp.is() )
         throw uno::RuntimeException();
@@ -61,6 +62,7 @@ void OOXMLDocPropHandler::InitNew()
     m_aCustomPropertyName.clear();
     m_nType = 0;
     m_nInBlock = 0;
+    m_CustomStringPropertyState = NONE;
 }
 
 void OOXMLDocPropHandler::AddCustomProperty( const uno::Any& aAny )
@@ -345,7 +347,29 @@ void SAL_CALL OOXMLDocPropHandler::endFastElement( ::sal_Int32 )
             m_aCustomPropertyName.clear();
         }
         else if ( m_nInBlock == 2 )
+        {
+            if (   m_nState == CUSTPR_TOKEN(Properties)
+                && m_nBlock == CUSTPR_TOKEN(property))
+            {
+                switch (m_nType)
+                {
+                    case VT_TOKEN(bstr):
+                    case VT_TOKEN(lpstr):
+                    case VT_TOKEN(lpwstr):
+                        if (!m_aCustomPropertyName.isEmpty() &&
+                            INSERTED != m_CustomStringPropertyState)
+                        {
+                            // the property has string type, so it is valid
+                            // even with an empty value - characters() has
+                            // not been called in that case
+                            AddCustomProperty(uno::makeAny(OUString()));
+                        }
+                    break;
+                }
+            }
+            m_CustomStringPropertyState = NONE;
             m_nType = 0;
+        }
     }
 }
 
@@ -595,6 +619,7 @@ void SAL_CALL OOXMLDocPropHandler::characters( const OUString& aChars )
                     case VT_TOKEN( lpwstr ):
                         // the property has string type
                         AddCustomProperty( uno::makeAny( AttributeConversion::decodeXString( aChars ) ) );
+                        m_CustomStringPropertyState = INSERTED;
                         break;
 
                     case VT_TOKEN( date ):
diff --git a/oox/source/docprop/docprophandler.hxx b/oox/source/docprop/docprophandler.hxx
index f0d8049..daabc03 100644
--- a/oox/source/docprop/docprophandler.hxx
+++ b/oox/source/docprop/docprophandler.hxx
@@ -50,6 +50,7 @@ class OOXMLDocPropHandler : public ::cppu::WeakImplHelper1< ::com::sun::star::xm
 
     sal_Int32 m_nInBlock;
 
+    enum { NONE, INSERTED } m_CustomStringPropertyState;
     OUString m_aCustomPropertyName;
 
 public:
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 79ab899..c866618 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -1037,7 +1037,8 @@ void FastSaxParserImpl::parse()
 void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
     int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
 {
-    sendPendingCharacters();
+    if (!pendingCharacters.isEmpty())
+        sendPendingCharacters();
     Entity& rEntity = getEntity();
     if( rEntity.maNamespaceCount.empty() )
     {
@@ -1144,7 +1145,8 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm
 
 void FastSaxParserImpl::callbackEndElement( const xmlChar*, const xmlChar*, const xmlChar* )
 {
-    sendPendingCharacters();
+    if (!pendingCharacters.isEmpty())
+        sendPendingCharacters();
     Entity& rEntity = getEntity();
     SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
     if( !rEntity.maNamespaceCount.empty() )


More information about the Libreoffice-commits mailing list