[Libreoffice-commits] core.git: 2 commits - oox/source sax/source
Michael Stahl
mstahl at redhat.com
Fri Jul 3 15:23:56 PDT 2015
oox/source/docprop/docprophandler.cxx | 25 +++++
oox/source/docprop/docprophandler.hxx | 1
sax/source/fastparser/fastparser.cxx | 6 -
sax/source/tools/fastserializer.cxx | 155 +++++++++++++++++++++++++++++++++-
sax/source/tools/fastserializer.hxx | 9 +
5 files changed, 190 insertions(+), 6 deletions(-)
New commits:
commit 3c45bfb0cabf206f6217f1de9eb5cfa12b78e46f
Author: Michael Stahl <mstahl at redhat.com>
Date: Fri Jul 3 23:49:08 2015 +0200
sax: FastSaxSerializer well-formed element test in presence of ...
... the maMarkStack, which causes the order of calls to
startFastElement()/endFastElement() to differ from the order of the tags
that are written into the output.
This is an attempt to improve the assertions, but if an assertion fails
it's generally not obvious where the problem actually is since the
unpredictable order may cause the problem to be detected and reported
much later than its root cause.
Let's see if this finds any new problems in export testing.
Change-Id: I97699cc8ef9b18ea9f4f221d5210134feecf0336
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 10e7ab3..a2ba5c15 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -196,7 +196,10 @@ namespace sax_fastparser {
}
#ifdef DBG_UTIL
- m_DebugStartedElements.push(Element);
+ if (mbMarkStackEmpty)
+ m_DebugStartedElements.push(Element);
+ else
+ maMarkStack.top()->m_DebugStartedElements.push_back(Element);
#endif
writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
@@ -213,10 +216,30 @@ namespace sax_fastparser {
void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
{
#ifdef DBG_UTIL
- assert(!m_DebugStartedElements.empty());
// Well-formedness constraint: Element Type Match
- assert(Element == m_DebugStartedElements.top());
- m_DebugStartedElements.pop();
+ if (mbMarkStackEmpty)
+ {
+ assert(!m_DebugStartedElements.empty());
+ assert(Element == m_DebugStartedElements.top());
+ m_DebugStartedElements.pop();
+ }
+ else
+ {
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+ {
+ // Sort is always well-formed fragment
+ assert(!maMarkStack.top()->m_DebugStartedElements.empty());
+ }
+ if (maMarkStack.top()->m_DebugStartedElements.empty())
+ {
+ maMarkStack.top()->m_DebugEndedElements.push_back(Element);
+ }
+ else
+ {
+ assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
+ maMarkStack.top()->m_DebugStartedElements.pop_back();
+ }
+ }
#endif
writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
@@ -324,17 +347,98 @@ namespace sax_fastparser {
mbMarkStackEmpty = false;
}
+#ifdef DBG_UTIL
+ static void lcl_DebugMergeAppend(
+ std::deque<sal_Int32> & rLeftEndedElements,
+ std::deque<sal_Int32> & rLeftStartedElements,
+ std::deque<sal_Int32> & rRightEndedElements,
+ std::deque<sal_Int32> & rRightStartedElements)
+ {
+ while (!rRightEndedElements.empty())
+ {
+ if (rLeftStartedElements.empty())
+ {
+ rLeftEndedElements.push_back(rRightEndedElements.front());
+ }
+ else
+ {
+ assert(rLeftStartedElements.back() == rRightEndedElements.front());
+ rLeftStartedElements.pop_back();
+ }
+ rRightEndedElements.pop_front();
+ }
+ while (!rRightStartedElements.empty())
+ {
+ rLeftStartedElements.push_back(rRightStartedElements.front());
+ rRightStartedElements.pop_front();
+ }
+ }
+
+ static void lcl_DebugMergePrepend(
+ std::deque<sal_Int32> & rLeftEndedElements,
+ std::deque<sal_Int32> & rLeftStartedElements,
+ std::deque<sal_Int32> & rRightEndedElements,
+ std::deque<sal_Int32> & rRightStartedElements)
+ {
+ while (!rLeftStartedElements.empty())
+ {
+ if (rRightEndedElements.empty())
+ {
+ rRightStartedElements.push_front(rLeftStartedElements.back());
+ }
+ else
+ {
+ assert(rRightEndedElements.front() == rLeftStartedElements.back());
+ rRightEndedElements.pop_front();
+ }
+ rLeftStartedElements.pop_back();
+ }
+ while (!rLeftEndedElements.empty())
+ {
+ rRightEndedElements.push_front(rLeftEndedElements.back());
+ rLeftEndedElements.pop_back();
+ }
+ }
+#endif
+
void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
{
SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
if ( mbMarkStackEmpty )
return;
+#ifdef DBG_UTIL
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+ {
+ // Sort is always well-formed fragment
+ assert(maMarkStack.top()->m_DebugStartedElements.empty());
+ assert(maMarkStack.top()->m_DebugEndedElements.empty());
+ }
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ maMarkStack.top()->m_DebugPostponedEndedElements,
+ maMarkStack.top()->m_DebugPostponedStartedElements);
+#endif
+
// flush, so that we get everything in getData()
maCachedOutputStream.flush();
if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
{
+#if DBG_UTIL
+ while (!maMarkStack.top()->m_DebugEndedElements.empty())
+ {
+ assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
+ maMarkStack.top()->m_DebugEndedElements.pop_front();
+ m_DebugStartedElements.pop();
+ }
+ while (!maMarkStack.top()->m_DebugStartedElements.empty())
+ {
+ m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
+ maMarkStack.top()->m_DebugStartedElements.pop_front();
+ }
+#endif
Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
maMarkStack.pop();
mbMarkStackEmpty = true;
@@ -343,8 +447,51 @@ namespace sax_fastparser {
return;
}
+#if DBG_UTIL
+ ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
+ ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
+#endif
const Int8Sequence aMerge( maMarkStack.top()->getData() );
maMarkStack.pop();
+#if DBG_UTIL
+ switch (eMergeType)
+ {
+ case MERGE_MARKS_APPEND:
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ break;
+ case MERGE_MARKS_PREPEND:
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
+ {
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ }
+ else
+ {
+ lcl_DebugMergePrepend(
+ topDebugEndedElements,
+ topDebugStartedElements,
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements);
+ }
+ break;
+ case MERGE_MARKS_POSTPONE:
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugPostponedEndedElements,
+ maMarkStack.top()->m_DebugPostponedStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ break;
+ case MERGE_MARKS_IGNORE:
+ break;
+ }
+#endif
if (maMarkStack.empty())
{
mbMarkStackEmpty = true;
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
index 95e631c..4dc786b 100644
--- a/sax/source/tools/fastserializer.hxx
+++ b/sax/source/tools/fastserializer.hxx
@@ -161,6 +161,15 @@ private:
Int8Sequence maPostponed;
public:
+#ifdef DBG_UTIL
+ // pending close tags, followed by pending open tags
+ ::std::deque<sal_Int32> m_DebugEndedElements;
+ ::std::deque<sal_Int32> m_DebugStartedElements;
+ // ... and another buffer for maPostponed ...
+ ::std::deque<sal_Int32> m_DebugPostponedEndedElements;
+ ::std::deque<sal_Int32> m_DebugPostponedStartedElements;
+#endif
+
ForMerge() : maData(), maPostponed() {}
virtual ~ForMerge() {}
commit cf17befa261fd64f95bb4db932ce4d7e149f4a72
Author: Michael Stahl <mstahl at redhat.com>
Date: Fri Jul 3 22:28:18 2015 +0200
tdf#91378: sax, oox: avoid sending empty strings to character callbacks
This reverts the changes in FastSaxParserImpl from commit
16e8ffbd5ec1fe7b81835ea6584547669d55d751 and instead fixes the problem
of inserting string properties with empty value locally in
OOXMLDocPropHandler.
This change was not wrong in any obvious way, but it turns out there is
one doc rhbz583386-4.docx that, when imported with this change and
exported to DOCX again, results in a non-well-formed document because of
some weird SDT stuff.
That problem is rather baffling, but unfortunately the
DocxAttributeOutput usage of FastSaxSerializer::mark() makes the DOCX
export rather un-debuggable, so avoid that problem by reverting the import
change for now.
Change-Id: I0d874cbfe82d4f15d58b50116dda152341bdf7b0
diff --git a/oox/source/docprop/docprophandler.cxx b/oox/source/docprop/docprophandler.cxx
index 1a5000b..c5d89b9 100644
--- a/oox/source/docprop/docprophandler.cxx
+++ b/oox/source/docprop/docprophandler.cxx
@@ -45,6 +45,7 @@ OOXMLDocPropHandler::OOXMLDocPropHandler( const uno::Reference< uno::XComponentC
, m_nBlock( 0 )
, m_nType( 0 )
, m_nInBlock( 0 )
+ , m_CustomStringPropertyState(NONE)
{
if ( !xContext.is() || !rDocProp.is() )
throw uno::RuntimeException();
@@ -61,6 +62,7 @@ void OOXMLDocPropHandler::InitNew()
m_aCustomPropertyName.clear();
m_nType = 0;
m_nInBlock = 0;
+ m_CustomStringPropertyState = NONE;
}
void OOXMLDocPropHandler::AddCustomProperty( const uno::Any& aAny )
@@ -345,7 +347,29 @@ void SAL_CALL OOXMLDocPropHandler::endFastElement( ::sal_Int32 )
m_aCustomPropertyName.clear();
}
else if ( m_nInBlock == 2 )
+ {
+ if ( m_nState == CUSTPR_TOKEN(Properties)
+ && m_nBlock == CUSTPR_TOKEN(property))
+ {
+ switch (m_nType)
+ {
+ case VT_TOKEN(bstr):
+ case VT_TOKEN(lpstr):
+ case VT_TOKEN(lpwstr):
+ if (!m_aCustomPropertyName.isEmpty() &&
+ INSERTED != m_CustomStringPropertyState)
+ {
+ // the property has string type, so it is valid
+ // even with an empty value - characters() has
+ // not been called in that case
+ AddCustomProperty(uno::makeAny(OUString()));
+ }
+ break;
+ }
+ }
+ m_CustomStringPropertyState = NONE;
m_nType = 0;
+ }
}
}
@@ -595,6 +619,7 @@ void SAL_CALL OOXMLDocPropHandler::characters( const OUString& aChars )
case VT_TOKEN( lpwstr ):
// the property has string type
AddCustomProperty( uno::makeAny( AttributeConversion::decodeXString( aChars ) ) );
+ m_CustomStringPropertyState = INSERTED;
break;
case VT_TOKEN( date ):
diff --git a/oox/source/docprop/docprophandler.hxx b/oox/source/docprop/docprophandler.hxx
index f0d8049..daabc03 100644
--- a/oox/source/docprop/docprophandler.hxx
+++ b/oox/source/docprop/docprophandler.hxx
@@ -50,6 +50,7 @@ class OOXMLDocPropHandler : public ::cppu::WeakImplHelper1< ::com::sun::star::xm
sal_Int32 m_nInBlock;
+ enum { NONE, INSERTED } m_CustomStringPropertyState;
OUString m_aCustomPropertyName;
public:
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 79ab899..c866618 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -1037,7 +1037,8 @@ void FastSaxParserImpl::parse()
void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
{
- sendPendingCharacters();
+ if (!pendingCharacters.isEmpty())
+ sendPendingCharacters();
Entity& rEntity = getEntity();
if( rEntity.maNamespaceCount.empty() )
{
@@ -1144,7 +1145,8 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm
void FastSaxParserImpl::callbackEndElement( const xmlChar*, const xmlChar*, const xmlChar* )
{
- sendPendingCharacters();
+ if (!pendingCharacters.isEmpty())
+ sendPendingCharacters();
Entity& rEntity = getEntity();
SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
if( !rEntity.maNamespaceCount.empty() )
More information about the Libreoffice-commits
mailing list