[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.3' - sw/qa writerfilter/source

Mike Kaganski mike.kaganski at collabora.com
Thu Aug 10 10:28:13 UTC 2017


 sw/qa/extras/ooxmlimport/data/tdf111550.docx          |binary
 sw/qa/extras/ooxmlimport/ooxmlimport.cxx              |   66 ++++++++++++++++++
 writerfilter/source/ooxml/OOXMLFastContextHandler.cxx |   19 +++++
 writerfilter/source/ooxml/OOXMLFastContextHandler.hxx |    4 +
 writerfilter/source/ooxml/factoryimpl_ns.py           |    4 +
 writerfilter/source/ooxml/model.xml                   |   19 +++++
 6 files changed, 112 insertions(+)

New commits:
commit 81ea6ed1f837545dd787c8855683b7ce04c265a8
Author: Mike Kaganski <mike.kaganski at collabora.com>
Date:   Thu Jul 13 09:08:56 2017 +0300

    tdf#111550: A workaround for out-of-order (in-paragraph) tbl on OOXML
    
    Word allows <w:tbl> to be direct child of <w:p>, which is illegal
    according to ECMA-376-1:2016.
    
    This allows for import the data in such tables (previously, this text
    was simply dropped, causing dataloss) - bug-to-bug compatibility
    with Word.
    
    Change-Id: I19c17ab19915ea46685727c635476fe5df593212
    Reviewed-on: https://gerrit.libreoffice.org/40909
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Mike Kaganski <mike.kaganski at collabora.com>
    (cherry picked from commit 67a61e54531801645d51ad89aac30064b8c4b4e8)
    Reviewed-on: https://gerrit.libreoffice.org/40949
    Tested-by: Mike Kaganski <mike.kaganski at collabora.com>

diff --git a/sw/qa/extras/ooxmlimport/data/tdf111550.docx b/sw/qa/extras/ooxmlimport/data/tdf111550.docx
new file mode 100644
index 000000000000..6e13df351906
Binary files /dev/null and b/sw/qa/extras/ooxmlimport/data/tdf111550.docx differ
diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
index 942917f67eb8..dde5f4157910 100644
--- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
+++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
@@ -1523,6 +1523,72 @@ DECLARE_OOXMLIMPORT_TEST(testGroupShapeFontName, "groupshape-fontname.docx")
     CPPUNIT_ASSERT_EQUAL(OUString(""), getProperty<OUString>(getRun(getParagraphOfText(1, xText), 1), "CharFontNameAsian"));
 }
 
+DECLARE_OOXMLIMPORT_TEST(testTdf111550, "tdf111550.docx")
+{
+    // The test document has following ill-formed structure:
+    //
+    //    <w:tbl>
+    //        ...
+    //        <w:tr>
+    //            <w:tc>
+    //                <w:p>
+    //                    <w:r>
+    //                        <w:t>[outer:A2]</w:t>
+    //                        <w:br w:type="textWrapping"/>
+    //                    </w:r>
+    //                    <w:tbl>
+    //                        <w:tr>
+    //                            <w:tc>
+    //                                <w:p>
+    //                                    <w:r>
+    //                                        <w:t>[inner:A1]</w:t>
+    //                                    </w:r>
+    //                                </w:p>
+    //                            </w:tc>
+    //                        </w:tr>
+    //                    </w:tbl>
+    //                </w:p>
+    //            </w:tc>
+    //        </w:tr>
+    //    </w:tbl>
+    //
+    // i.e., a <w:tbl> as direct child of <w:p> inside another table.
+    // Word accepts that illegal OOXML, and treats it as equal to
+    //
+    //    <w:tbl>
+    //        ...
+    //        <w:tr>
+    //            <w:tc>
+    //                <w:tbl>
+    //                    <w:tr>
+    //                        <w:tc>
+    //                            <w:p>
+    //                                <w:r>
+    //                                    <w:t>[outer:A2]</w:t>
+    //                                    <w:br w:type="textWrapping"/>
+    //                                </w:r>
+    //                                <w:r>
+    //                                    <w:t>[inner:A1]</w:t>
+    //                                </w:r>
+    //                            </w:p>
+    //                        </w:tc>
+    //                    </w:tr>
+    //                </w:tbl>
+    //            </w:tc>
+    //        </w:tr>
+    //    </w:tbl>
+    //
+    // i.e., moves all contents of the outer paragraph into the inner table's first paragraph.
+
+    CPPUNIT_ASSERT_EQUAL(2, getParagraphs());
+
+    uno::Reference<text::XTextContent> outerTable = getParagraphOrTable(1);
+    getCell(outerTable, "A1", "[outer:A1]");
+    uno::Reference<text::XText> cellA2(getCell(outerTable, "A2"), uno::UNO_QUERY_THROW);
+    uno::Reference<text::XTextContent> innerTable = getParagraphOrTable(1, cellA2);
+    getCell(innerTable, "A1", "[outer:A2]\n[inner:A1]");
+}
+
 // tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index bb59ed9bebdc..3b39eaa1a954 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -1614,6 +1614,25 @@ void OOXMLFastContextHandlerTextTable::lcl_endFastElement
     mpParserState->endTable();
 }
 
+// tdf#111550
+void OOXMLFastContextHandlerTextTable::start_P_Tbl()
+{
+    // Normally, when one paragraph ends, and another begins,
+    // in OOXMLFactory_wml::endAction handler for <w:p>,
+    // pHandler->endOfParagraph() is called, which (among other things)
+    // calls TableManager::setHandle() to update current cell's starting point.
+    // Then, in OOXMLFactory_wml::startAction for next <w:p>,
+    // pHandler->startParagraphGroup() is called, which ends previous group,
+    // and there, it pushes cells to row in TableManager::endParagraphGroup()
+    // (cells have correct bounds defined by mCurHandle).
+    // When a table is child of a <w:p>, that paragraph doesn't end before nested
+    // paragraph begins. So, pHandler->endOfParagraph() was not (and should not be)
+    // called. But as next paragraph starts, is the previous group is closed, then
+    // cells will have wrong boundings. Here, we know that we *are* in paragraph
+    // group, but it should not be finished.
+    mpParserState->setInParagraphGroup(false);
+}
+
 /*
   class OOXMLFastContextHandlerShape
  */
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
index eaf40e27f47b..62a4e00822df 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
@@ -433,6 +433,10 @@ public:
 
     virtual std::string getType() const override { return "TextTable"; }
 
+    // tdf#111550
+    // when <w:tbl> appears as direct child of <w:p>, we need to rearrange this paragraph
+    // to merge with the table's first paragraph (that's what Word does in this case)
+    void start_P_Tbl();
 protected:
     virtual void lcl_startFastElement(Token_t Element, const css::uno::Reference< css::xml::sax::XFastAttributeList > & Attribs)
         throw (css::uno::RuntimeException, css::xml::sax::SAXException, std::exception) override;
diff --git a/writerfilter/source/ooxml/factoryimpl_ns.py b/writerfilter/source/ooxml/factoryimpl_ns.py
index 74ee6e8e3d25..0ac4d0e455ac 100644
--- a/writerfilter/source/ooxml/factoryimpl_ns.py
+++ b/writerfilter/source/ooxml/factoryimpl_ns.py
@@ -442,6 +442,10 @@ def factoryChooseAction(actionNode):
     elif actionNode.getAttribute("action") == "handleGridBefore" or actionNode.getAttribute("action") == "handleGridAfter":
         ret.append("    %sif (OOXMLFastContextHandlerTextTableRow* pTextTableRow = dynamic_cast<OOXMLFastContextHandlerTextTableRow*>(pHandler))" % extra_space)
         ret.append("    %s    pTextTableRow->%s();" % (extra_space, actionNode.getAttribute("action")))
+    # tdf#111550
+    elif actionNode.getAttribute("action") in ("start_P_Tbl"):
+        ret.append("    %sif (OOXMLFastContextHandlerTextTable* pTextTable = dynamic_cast<OOXMLFastContextHandlerTextTable*>(pHandler))" % extra_space)
+        ret.append("    %s    pTextTable->%s();" % (extra_space, actionNode.getAttribute("action")))
     elif actionNode.getAttribute("action") in ("sendProperty", "handleHyperlink"):
         ret.append("    %sif (OOXMLFastContextHandlerStream* pStream = dynamic_cast<OOXMLFastContextHandlerStream*>(pHandler))" % extra_space)
         ret.append("    %s    pStream->%s();" % (extra_space, actionNode.getAttribute("action")))
diff --git a/writerfilter/source/ooxml/model.xml b/writerfilter/source/ooxml/model.xml
index 98e80b8eff2b..4eb98b86c6bf 100644
--- a/writerfilter/source/ooxml/model.xml
+++ b/writerfilter/source/ooxml/model.xml
@@ -14191,6 +14191,10 @@
           <ref name="CT_Br_OutOfOrder"/>
         </element>
         <!-- end tdf#108714 -->
+        <!-- tdf#111550 : allow <w:tbl> at paragraph level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word -->
+        <element name="tbl">
+          <ref name="CT_P_Tbl"/>
+        </element>
       </define>
       <define name="ST_TblWidth">
         <choice>
@@ -14704,6 +14708,17 @@
         </element>
         <ref name="EG_ContentRowContent"/>
       </define>
+      <!-- tdf#111550 : Special element - copy of usual CT_Tbl, but only used as direct child of CT_P -->
+      <define name="CT_P_Tbl">
+        <ref name="EG_RangeMarkupElements"/>
+        <element name="tblPr">
+          <ref name="CT_TblPr"/>
+        </element>
+        <element name="tblGrid">
+          <ref name="CT_TblGrid"/>
+        </element>
+        <ref name="EG_ContentRowContent"/>
+      </define>
       <define name="CT_TblLook">
         <attribute name="firstRow">
           <ref name="ST_OnOff"/>
@@ -18438,6 +18453,10 @@
         <element name="tblPrExChange" tokenid="ooxml:CT_TblPrEx_tblPrExChange"/>
     </resource>
     <resource name="CT_Tbl" resource="TextTable"/>
+    <!-- tdf#111550 : allow <w:tbl> at paragraph level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word -->
+    <resource name="CT_P_Tbl" resource="TextTable">
+      <action name="start" action="start_P_Tbl"/>
+    </resource>
     <resource name="CT_TblLook" resource="Properties">
       <attribute name="firstRow" tokenid="ooxml:CT_TblLook_firstRow"/>
       <attribute name="lastRow" tokenid="ooxml:CT_TblLook_lastRow"/>


More information about the Libreoffice-commits mailing list