[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.3' - sw/qa writerfilter/source
Mike Kaganski
mike.kaganski at collabora.com
Tue Jun 27 14:39:55 UTC 2017
sw/qa/extras/ooxmlimport/data/tdf108714.docx |binary
sw/qa/extras/ooxmlimport/ooxmlimport.cxx | 46 ++++++++++++++++++
writerfilter/source/ooxml/OOXMLFastContextHandler.cxx | 13 +++++
writerfilter/source/ooxml/OOXMLFastContextHandler.hxx | 1
writerfilter/source/ooxml/OOXMLParserState.cxx | 17 ++++++
writerfilter/source/ooxml/OOXMLParserState.hxx | 4 +
writerfilter/source/ooxml/factoryimpl_ns.py | 2
writerfilter/source/ooxml/model.xml | 18 +++++++
8 files changed, 100 insertions(+), 1 deletion(-)
New commits:
commit 304dcd15d5ea5bac750249d5688372123565f9df
Author: Mike Kaganski <mike.kaganski at collabora.com>
Date: Fri Jun 23 14:48:03 2017 +0300
tdf#108714: allow <w:br> as direct child of <w:body>
LibreOffice doesn't accept <w:br> element as a child of <w:body>.
ECMA-376-1:2016 17.3.3.1 describes br as element of a run content,
and points to CT_Br in §A.1.
CT_Br may appear only as part of EG_RunInnerContent.
In turn, EG_RunInnerContent may appear only inside CT_R.
So, using <w:br> outside of <w:r> produces ill-formed OOXML.
Open XML SDK 2.5 Productivity Tool for Microsoft Office confirms that,
showing OpenXmlUnknownElement error.
However, Word accepts it as direct child of <w:body>. It behaves as if
the <w:br> were used as first element in first run of the following
<w:p> (thus creating page break after next paragraph).
Another Word bug that provokes third-parties to create ill-formed
documents, and requires LibreOffice to be bug-to-bug compatible.
This commit makes the following changes:
1. Registers a dedicated complex type CT_Br_OutOfOrder to handle those
unusual breaks, with corresponding handler function.
2. In the handler function, saves the gathered property set to parser
state to use later in next paragraph group handler.
This reproduces Word behaviour.
Change-Id: I5df6927e2de9266b58f87807319ad1c4977e45a7
Reviewed-on: https://gerrit.libreoffice.org/39168
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Mike Kaganski <mike.kaganski at collabora.com>
(cherry picked from commit a4a1467bc47b81ad68ecad0d5e2e163670582919)
Reviewed-on: https://gerrit.libreoffice.org/39303
Tested-by: Mike Kaganski <mike.kaganski at collabora.com>
diff --git a/sw/qa/extras/ooxmlimport/data/tdf108714.docx b/sw/qa/extras/ooxmlimport/data/tdf108714.docx
new file mode 100644
index 000000000000..e564d44a648b
Binary files /dev/null and b/sw/qa/extras/ooxmlimport/data/tdf108714.docx differ
diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
index ec80bb6cb561..2a22195fd17b 100644
--- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
+++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
@@ -1398,6 +1398,52 @@ DECLARE_OOXMLIMPORT_TEST(testTdf108408, "tdf108408.docx")
}
+DECLARE_OOXMLIMPORT_TEST(testTdf108714, "tdf108714.docx")
+{
+ CPPUNIT_ASSERT_EQUAL(4, getParagraphs());
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Page break is absent - we lost bug-to-bug compatibility with Word", 3, getPages());
+
+ // The second (empty) paragraph must be at first page, despite the <w:br> element was before it.
+ // That's because Word treats such break as first element in first run of following paragraph:
+ //
+ // <w:br w:type="page"/>
+ // <w:p>
+ // <w:r>
+ // <w:t/>
+ // </w:r>
+ // </w:p>
+ //
+ // is equal to
+ //
+ // <w:p>
+ // <w:r>
+ // <w:br w:type="page"/>
+ // </w:r>
+ // </w:p>
+ //
+ // which emits page break after that empty paragraph.
+
+ uno::Reference< text::XTextRange > paragraph = getParagraph(1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 1"), paragraph->getString());
+ style::BreakType breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE, breakType);
+
+ paragraph = getParagraph(2);
+ CPPUNIT_ASSERT_EQUAL(OUString(), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE, breakType);
+
+ paragraph = getParagraph(3);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 2"), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
+
+ paragraph = getParagraph(4);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 3"), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
+}
+
// tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index bb5ff5fa98fb..cd79f2c0454b 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -397,6 +397,10 @@ void OOXMLFastContextHandler::startParagraphGroup()
{
mpStream->startParagraphGroup();
mpParserState->setInParagraphGroup(true);
+
+ // tdf#108714 : if we have a postponed break information,
+ // then apply it now, before any other paragraph content.
+ mpParserState->resolvePostponedBreak(*mpStream);
}
}
}
@@ -1061,6 +1065,15 @@ void OOXMLFastContextHandlerProperties::handleBreak()
}
}
+// tdf#108714 : allow <w:br> at block level (despite this is illegal according to ECMA-376-1:2016)
+void OOXMLFastContextHandlerProperties::handleOutOfOrderBreak()
+{
+ if(isForwardEvents())
+ {
+ mpParserState->setPostponedBreak(getPropertySet());
+ }
+}
+
void OOXMLFastContextHandlerProperties::handleOLE()
{
OOXMLOLEHandler aOLEHandler(this);
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
index f986c76029ae..e4e3e563fad5 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
@@ -278,6 +278,7 @@ public:
void handleComment();
void handlePicture();
void handleBreak();
+ void handleOutOfOrderBreak();
void handleOLE();
void handleFontRel();
void handleHyperlinkURL();
diff --git a/writerfilter/source/ooxml/OOXMLParserState.cxx b/writerfilter/source/ooxml/OOXMLParserState.cxx
index 71d0290b707a..a655488e3194 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.cxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.cxx
@@ -20,6 +20,7 @@
#include <stdio.h>
#include <iostream>
#include "OOXMLParserState.hxx"
+#include "Handler.hxx"
namespace writerfilter {
namespace ooxml
@@ -210,6 +211,22 @@ void OOXMLParserState::setTableProperties(const OOXMLPropertySet::Pointer_t& pPr
}
}
+// tdf#108714
+void OOXMLParserState::resolvePostponedBreak(Stream & rStream)
+{
+ if (mpPostponedBreak)
+ {
+ OOXMLBreakHandler aBreakHandler(rStream);
+ mpPostponedBreak->resolve(aBreakHandler);
+ mpPostponedBreak.reset();
+ }
+}
+
+void OOXMLParserState::setPostponedBreak(const OOXMLPropertySet::Pointer_t & pProps)
+{
+ mpPostponedBreak = pProps;
+}
+
void OOXMLParserState::startTable()
{
OOXMLPropertySet::Pointer_t pCellProps;
diff --git a/writerfilter/source/ooxml/OOXMLParserState.hxx b/writerfilter/source/ooxml/OOXMLParserState.hxx
index 0ba0079d1653..d328b07b2835 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.hxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.hxx
@@ -59,6 +59,7 @@ class OOXMLParserState final
bool savedInCharacterGroup;
bool savedLastParagraphInSection;
std::vector<SavedAlternateState> maSavedAlternateStates;
+ OOXMLPropertySet::Pointer_t mpPostponedBreak;
public:
typedef std::shared_ptr<OOXMLParserState> Pointer_t;
@@ -102,6 +103,9 @@ public:
void setRowProperties(const OOXMLPropertySet::Pointer_t& pProps);
void resolveTableProperties(Stream & rStream);
void setTableProperties(const OOXMLPropertySet::Pointer_t& pProps);
+ // tdf#108714
+ void resolvePostponedBreak(Stream & rStream);
+ void setPostponedBreak(const OOXMLPropertySet::Pointer_t& pProps);
void startTable();
void endTable();
diff --git a/writerfilter/source/ooxml/factoryimpl_ns.py b/writerfilter/source/ooxml/factoryimpl_ns.py
index cee40a75bd45..74ee6e8e3d25 100644
--- a/writerfilter/source/ooxml/factoryimpl_ns.py
+++ b/writerfilter/source/ooxml/factoryimpl_ns.py
@@ -428,7 +428,7 @@ def factoryChooseAction(actionNode):
ret.append(" {")
extra_space = " "
- if actionNode.getAttribute("action") in ("handleXNotes", "handleHdrFtr", "handleComment", "handlePicture", "handleBreak", "handleOLE", "handleFontRel", "handleHyperlinkURL"):
+ if actionNode.getAttribute("action") in ("handleXNotes", "handleHdrFtr", "handleComment", "handlePicture", "handleBreak", "handleOutOfOrderBreak", "handleOLE", "handleFontRel", "handleHyperlinkURL"):
ret.append(" %sif (OOXMLFastContextHandlerProperties* pProperties = dynamic_cast<OOXMLFastContextHandlerProperties*>(pHandler))" % extra_space)
ret.append(" %s pProperties->%s();" % (extra_space, actionNode.getAttribute("action")))
elif actionNode.getAttribute("action") == "propagateCharacterPropertiesAsSet":
diff --git a/writerfilter/source/ooxml/model.xml b/writerfilter/source/ooxml/model.xml
index d132f03ed72f..5318d7d3c819 100644
--- a/writerfilter/source/ooxml/model.xml
+++ b/writerfilter/source/ooxml/model.xml
@@ -13224,6 +13224,14 @@
<ref name="ST_BrClear"/>
</attribute>
</define>
+ <define name="CT_Br_OutOfOrder">
+ <attribute name="type">
+ <ref name="ST_BrType"/>
+ </attribute>
+ <attribute name="clear">
+ <ref name="ST_BrClear"/>
+ </attribute>
+ </define>
<define name="ST_PTabAlignment">
<choice>
<!-- Left -->
@@ -13963,6 +13971,11 @@
<element name="tbl">
<ref name="CT_Tbl"/>
</element>
+ <!-- tdf#108714 : allow <w:br> at block level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word -->
+ <element name="br">
+ <ref name="CT_Br_OutOfOrder"/>
+ </element>
+ <!-- end tdf#108714 -->
<ref name="EG_RunLevelElts"/>
</choice>
</define>
@@ -17842,6 +17855,11 @@
<attribute name="clear" tokenid="ooxml:CT_Br_clear"/>
<action name="end" action="handleBreak"/>
</resource>
+ <resource name="CT_Br_OutOfOrder" resource="Properties">
+ <attribute name="type" tokenid="ooxml:CT_Br_type"/>
+ <attribute name="clear" tokenid="ooxml:CT_Br_clear"/>
+ <action name="end" action="handleOutOfOrderBreak"/>
+ </resource>
<resource name="ST_PTabAlignment" resource="List">
<value tokenid="ooxml:Value_ST_PTabAlignment_left">left</value>
<value tokenid="ooxml:Value_ST_PTabAlignment_center">center</value>
More information about the Libreoffice-commits
mailing list