[Libreoffice-commits] core.git: sw/qa writerfilter/source

László Németh (via logerrit) logerrit at kemper.freedesktop.org
Fri Feb 12 17:28:13 UTC 2021


 dev/null                                              |binary
 sw/qa/extras/ooxmlexport/ooxmlexport15.cxx            |    6 -
 writerfilter/source/dmapper/DomainMapper.cxx          |   13 ++
 writerfilter/source/dmapper/DomainMapper_Impl.cxx     |  100 +++++++++++++++++-
 writerfilter/source/dmapper/DomainMapper_Impl.hxx     |    7 +
 writerfilter/source/ooxml/OOXMLDocumentImpl.cxx       |    7 -
 writerfilter/source/ooxml/OOXMLDocumentImpl.hxx       |    1 
 writerfilter/source/ooxml/OOXMLFastContextHandler.cxx |   24 +++-
 writerfilter/source/ooxml/OOXMLParserState.cxx        |    8 +
 writerfilter/source/ooxml/OOXMLParserState.hxx        |    4 
 10 files changed, 153 insertions(+), 17 deletions(-)

New commits:
commit 9b39ce0e66acfe812e1d50e530dc2ccdef3e1357
Author:     László Németh <nemeth at numbertext.org>
AuthorDate: Wed Feb 10 00:12:52 2021 +0100
Commit:     László Németh <nemeth at numbertext.org>
CommitDate: Fri Feb 12 18:27:31 2021 +0100

    tdf#76260 DOCX import: fix slow footnote import
    
    by parsing footnotes.xml only once instead
    of parsing again and again for every footnotes.
    This was a serious performance problem for
    documents with hundreds of footnotes, where the
    footnote import took minutes instead of seconds.
    
    Note: switch off CHECK_NOTMERGED in a debug build
    to measure realistic speed-up, e.g. with the
    reported document: 2 min 36 sec -> 22 sec, and
    not 1 min 40 sec, as with CHECK_NOTMERGED.
    
    Revert commit 60dbe21f59a45889c433727d0862c9a4274d94d2
    ("tdf#88126: sw_ooxmlexport15: Add unittest"), because
    it was a fragile table layout test, worked only by accident.
    
    Change-Id: I9460442cf0c30f2bc5ff393c947a008ca7bba6df
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110811
    Tested-by: Jenkins
    Reviewed-by: László Németh <nemeth at numbertext.org>

diff --git a/sw/qa/extras/ooxmlexport/data/tdf88126.docx b/sw/qa/extras/ooxmlexport/data/tdf88126.docx
deleted file mode 100644
index 9d4d2d5d0f8c..000000000000
Binary files a/sw/qa/extras/ooxmlexport/data/tdf88126.docx and /dev/null differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
index 574b925a07aa..31426770648a 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
@@ -130,12 +130,6 @@ DECLARE_OOXMLEXPORT_TEST(testTdf137850_compat15ZOrder, "tdf137850_compat15ZOrder
     CPPUNIT_ASSERT_EQUAL_MESSAGE("Textbox is in the foreground", true, getProperty<bool>(xShape, "Opaque"));
 }
 
-DECLARE_OOXMLEXPORT_TEST(testTdf88126, "tdf88126.docx")
-{
-    // Without the fix in place, this test would have hung
-    CPPUNIT_ASSERT_EQUAL(11, getPages());
-}
-
 DECLARE_OOXMLEXPORT_EXPORTONLY_TEST(testTdf118701, "tdf118701.docx")
 {
     // This was 6, related to moving inline images after the page breaks
diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx
index 9ea70164c96e..4f58670aefb5 100644
--- a/writerfilter/source/dmapper/DomainMapper.cxx
+++ b/writerfilter/source/dmapper/DomainMapper.cxx
@@ -2276,7 +2276,7 @@ void DomainMapper::sprmWithProps( Sprm& rSprm, const PropertyMapPtr& rContext )
     case NS_ooxml::LN_EG_RPrBase_rStyle:
         {
             OUString sConvertedName( m_pImpl->GetStyleSheetTable()->ConvertStyleName( sStringValue, true ) );
-            if (m_pImpl->CheckFootnoteStyle())
+            if (m_pImpl->CheckFootnoteStyle() && m_pImpl->GetFootnoteContext())
                 m_pImpl->SetHasFootnoteStyle(m_pImpl->GetFootnoteContext()->GetFootnoteStyle() == sConvertedName);
 
             // First check if the style exists in the document.
@@ -3370,6 +3370,17 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len)
 
     if (len == 1)
     {
+        // preload all footnotes in separated footnotes
+        if (sText[0] == 0x5)
+        {
+            if (m_pImpl->GetFootnoteCount() > -1)
+            {
+                m_pImpl->PopFootOrEndnote(/*bIsFootnote=*/true);
+                m_pImpl->PushFootOrEndnote(/*bIsFootnote=*/true);
+            }
+            m_pImpl->IncrementFootnoteCount();
+        }
+
         // If the footnote contains a Footnote Reference Mark, it can't be a custom footnote
         // ******
         // This code block is wrong, as it should also be in m_pImpl->IsInFootOrEndnote().
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index c3cbbcb2a2b9..ebfe9d625bd2 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -45,6 +45,8 @@
 #include <com/sun/star/text/XDocumentIndex.hpp>
 #include <com/sun/star/text/XDocumentIndexesSupplier.hpp>
 #include <com/sun/star/text/XFootnote.hpp>
+#include <com/sun/star/text/XEndnotesSupplier.hpp>
+#include <com/sun/star/text/XFootnotesSupplier.hpp>
 #include <com/sun/star/text/XLineNumberingProperties.hpp>
 #include <com/sun/star/style/XStyle.hpp>
 #include <com/sun/star/text/PageNumberType.hpp>
@@ -305,6 +307,7 @@ DomainMapper_Impl::DomainMapper_Impl(
         m_bHasFootnoteStyle(false),
         m_bCheckFootnoteStyle(false),
         m_eSkipFootnoteState(SkipFootnoteSeparator::OFF),
+        m_nFootnotes(-1),
         m_bLineNumberingSet( false ),
         m_bIsInFootnoteProperties( false ),
         m_bIsParaMarkerChange( false ),
@@ -2700,6 +2703,7 @@ void DomainMapper_Impl::CreateRedline(uno::Reference<text::XTextRange> const& xR
         pRedlineProperties[1].Value <<= ConversionHelper::ConvertDateStringToDateTime( pRedline->m_sDate );
         pRedlineProperties[2].Name = getPropertyName( PROP_REDLINE_REVERT_PROPERTIES );
         pRedlineProperties[2].Value <<= pRedline->m_aRevertProperties;
+        // TODO: add !IsInFootOrEndnote(), if loading of endnotes is optimized
         if (!m_bIsActualParagraphFramed)
         {
             uno::Reference < text::XRedline > xRedline( xRange, uno::UNO_QUERY_THROW );
@@ -2712,6 +2716,12 @@ void DomainMapper_Impl::CreateRedline(uno::Reference<text::XTextRange> const& xR
             aFramedRedlines.push_back( uno::makeAny(sType) );
             aFramedRedlines.push_back( uno::makeAny(aRedlineProperties) );
         }
+        else if (IsInFootOrEndnote())
+        {
+            aFootnoteRedlines.push_back( uno::makeAny(xRange) );
+            aFootnoteRedlines.push_back( uno::makeAny(sType) );
+            aFootnoteRedlines.push_back( uno::makeAny(aRedlineProperties) );
+        }
     }
     catch( const uno::Exception & )
     {
@@ -2823,8 +2833,92 @@ void DomainMapper_Impl::PushAnnotation()
 }
 
 
-void DomainMapper_Impl::PopFootOrEndnote()
+void DomainMapper_Impl::PopFootOrEndnote( bool bIsFootnote )
 {
+    // content of the footnotes were inserted after the first footnote in temporary footnotes,
+    // restore the content of the actual footnote by copying its content from the first
+    // (remaining) temporary footnote and remove the temporary footnote.
+    // FIXME: add footnote IDs to handle possible differences in footnote serialization
+    uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY );
+    if ( bIsFootnote && GetFootnoteCount() > -1 && xFootnotesSupplier.is() )
+    {
+        auto xFootnotes = xFootnotesSupplier->getFootnotes();
+        uno::Reference< text::XFootnote > xFootnoteFirst, xFootnoteLast;
+        xFootnotes->getByIndex(xFootnotes->getCount()-1) >>= xFootnoteLast;
+        if ( xFootnotes->getCount() > 1 && xFootnoteLast->getLabel().isEmpty() )
+        {
+            // copy content of the first remaining temporary footnote
+            xFootnotes->getByIndex(1) >>= xFootnoteFirst;
+            uno::Reference< text::XText > xSrc( xFootnoteFirst, uno::UNO_QUERY_THROW );
+            uno::Reference< text::XText > xDest( xFootnoteLast, uno::UNO_QUERY_THROW );
+            uno::Reference< text::XTextCopy > xTxt, xTxt2;
+            xTxt.set(  xSrc, uno::UNO_QUERY_THROW );
+            xTxt2.set( xDest, uno::UNO_QUERY_THROW );
+            xTxt2->copyText( xTxt );
+
+            // copy its redlines
+            std::vector<sal_Int32> redPos, redLen;
+            sal_Int32 nFirstRedline = -1;
+            for( size_t i = 0; i < aFootnoteRedlines.size(); i+=3)
+            {
+                uno::Reference< text::XTextRange > xRange;
+                aFootnoteRedlines[i] >>= xRange;
+
+                // is this a redline of the temporary footnote?
+                uno::Reference<text::XTextCursor> xRangeCursor;
+                try
+                {
+                    xRangeCursor = xSrc->createTextCursorByRange( xRange );
+                }
+                catch( const uno::Exception& )
+                {
+                }
+                if (xRangeCursor.is())
+                {
+                    nFirstRedline = i;
+                    sal_Int32 nLen = xRange->getString().getLength();
+                    redLen.push_back(nLen);
+                    xRangeCursor->gotoRange(xSrc->getStart(), true);
+                    redPos.push_back(xRangeCursor->getString().getLength() - nLen);
+                }
+                else
+                {
+                    // we have already found all redlines of the footnote,
+                    // skip checking the redlines of the other footnotes
+                    if (nFirstRedline > -1)
+                        break;
+                    // failed createTextCursorByRange(), for example, table inside the frame
+                    redLen.push_back(-1);
+                    redPos.push_back(-1);
+                }
+            }
+
+            // create redlines in the copied footnote
+            for( size_t i = 0; nFirstRedline > -1 && i <= sal::static_int_cast<size_t>(nFirstRedline); i+=3)
+            {
+                OUString sType;
+                beans::PropertyValues aRedlineProperties( 3 );
+                // skip failed createTextCursorByRange()
+                if (redPos[i/3] == -1)
+                    continue;
+                aFootnoteRedlines[i+1] >>= sType;
+                aFootnoteRedlines[i+2] >>= aRedlineProperties;
+                uno::Reference< text::XTextCursor > xCrsr = xDest->getText()->createTextCursor();
+                xCrsr->goRight(redPos[i/3], false);
+                xCrsr->goRight(redLen[i/3], true);
+                uno::Reference < text::XRedline > xRedline( xCrsr, uno::UNO_QUERY_THROW );
+                xRedline->makeRedline( sType, aRedlineProperties );
+            }
+
+            // remove processed redlines
+            for( size_t i = 0; nFirstRedline > -1 && i <= sal::static_int_cast<size_t>(nFirstRedline) + 2; i++)
+                aFootnoteRedlines.pop_front();
+
+            // remove temporary footnote
+            xFootnoteFirst->getAnchor()->setString("");
+        }
+    }
+
     if (!IsRTFImport())
         RemoveLastParagraph();
 
@@ -7135,6 +7229,8 @@ void DomainMapper_Impl::RemoveTopRedline( )
 {
     if (m_aRedlines.top().empty())
     {
+        if (GetFootnoteCount() > -1)
+            return;
         SAL_WARN("writerfilter.dmapper", "RemoveTopRedline called with empty stack");
         throw uno::Exception("RemoveTopRedline failed", nullptr);
     }
@@ -7514,7 +7610,7 @@ void DomainMapper_Impl::substream(Id rName,
     break;
     case NS_ooxml::LN_footnote:
     case NS_ooxml::LN_endnote:
-        PopFootOrEndnote();
+        PopFootOrEndnote( NS_ooxml::LN_footnote == rName );
     break;
     case NS_ooxml::LN_annotation :
         PopAnnotation();
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
index ab29d0a3a185..8441b4012152 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
@@ -525,6 +525,8 @@ private:
     bool m_bCheckFootnoteStyle;
     /// Skip paragraphs from the <w:separator/> footnote
     SkipFootnoteSeparator           m_eSkipFootnoteState;
+    /// preload footnotes
+    sal_Int32                       m_nFootnotes;
 
     bool                            m_bLineNumberingSet;
     bool                            m_bIsInFootnoteProperties;
@@ -790,7 +792,7 @@ public:
     bool IsInTOC() const { return m_bStartTOC; }
 
     void PushFootOrEndnote( bool bIsFootnote );
-    void PopFootOrEndnote();
+    void PopFootOrEndnote( bool bIsFootnote );
     bool IsInFootOrEndnote() const { return m_bInFootOrEndnote; }
 
     void StartCustomFootnote(const PropertyMapPtr pContext);
@@ -804,6 +806,8 @@ public:
 
     SkipFootnoteSeparator GetSkipFootnoteState() const { return m_eSkipFootnoteState; }
     void SetSkipFootnoteState(SkipFootnoteSeparator eId) { m_eSkipFootnoteState =  eId; }
+    sal_Int32 GetFootnoteCount() const { return m_nFootnotes; }
+    void IncrementFootnoteCount() { ++m_nFootnotes; }
 
     void PushAnnotation();
     void PopAnnotation();
@@ -1091,6 +1095,7 @@ public:
     /// store their data, and create them after frame creation
     bool m_bIsActualParagraphFramed;
     std::vector<css::uno::Any> aFramedRedlines;
+    std::deque<css::uno::Any> aFootnoteRedlines;
 
     bool IsParaWithInlineObject() const { return m_bParaWithInlineObject; }
 
diff --git a/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx b/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx
index 813b9cbc75fb..938d977d3348 100644
--- a/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx
+++ b/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx
@@ -52,6 +52,7 @@ namespace writerfilter::ooxml
 OOXMLDocumentImpl::OOXMLDocumentImpl(OOXMLStream::Pointer_t const & pStream, const uno::Reference<task::XStatusIndicator>& xStatusIndicator, bool bSkipImages, const uno::Sequence<beans::PropertyValue>& rDescriptor)
     : mpStream(pStream)
     , mxStatusIndicator(xStatusIndicator)
+    , mpXNoteStream()
     , mnXNoteId(0)
     , mbIsSubstream(false)
     , mbSkipImages(bSkipImages)
@@ -270,8 +271,8 @@ void OOXMLDocumentImpl::resolveFootnote(Stream & rStream,
                                         Id aType,
                                         const sal_Int32 nNoteId)
 {
-    writerfilter::Reference<Stream>::Pointer_t pStream =
-        getXNoteStream(OOXMLStream::FOOTNOTES, nNoteId);
+    if (!mpXNoteStream)
+        mpXNoteStream = getXNoteStream(OOXMLStream::FOOTNOTES, nNoteId);
 
     Id nId;
     switch (aType)
@@ -285,7 +286,7 @@ void OOXMLDocumentImpl::resolveFootnote(Stream & rStream,
         break;
     }
 
-    resolveFastSubStreamWithId(rStream, pStream, nId);
+    resolveFastSubStreamWithId(rStream, mpXNoteStream, nId);
 }
 
 void OOXMLDocumentImpl::resolveEndnote(Stream & rStream,
diff --git a/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx b/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx
index 1848f8971766..35a77989605b 100644
--- a/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx
+++ b/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx
@@ -36,6 +36,7 @@ class OOXMLDocumentImpl : public OOXMLDocument
 {
     OOXMLStream::Pointer_t mpStream;
     css::uno::Reference<css::task::XStatusIndicator> mxStatusIndicator;
+    writerfilter::Reference<Stream>::Pointer_t mpXNoteStream;
     sal_Int32 mnXNoteId;
 
     css::uno::Reference<css::frame::XModel> mxModel;
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index a115badfe72a..fe02597775bd 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -41,6 +41,7 @@
 const sal_Unicode uCR = 0xd;
 const sal_Unicode uFtnEdnRef = 0x2;
 const sal_Unicode uFtnEdnSep = 0x3;
+const sal_Unicode uFtnSep = 0x5;
 const sal_Unicode uTab = 0x9;
 const sal_Unicode uPgNum = 0x0;
 const sal_Unicode uNoBreakHyphen = 0x2011;
@@ -165,12 +166,22 @@ void SAL_CALL OOXMLFastContextHandler::startFastElement
         mbPreserveSpace = Attribs->getValue(oox::NMSP_xml | oox::XML_space) == "preserve";
         mbPreserveSpaceSet = true;
     }
-    if (Element == (NMSP_officeMath | XML_oMathPara))
+    if (Element == W_TOKEN(footnote))
+    {
+        // send uFtnSep to sign new footnote content, but skip footnote separators
+        if (!Attribs->hasAttribute(W_TOKEN(type)) ||
+                ( Attribs->getValue(W_TOKEN(type)) != "separator" &&
+                  Attribs->getValue(W_TOKEN(type)) != "continuationSeparator" ))
+        {
+            mpParserState->setStartFootnote(true);
+        }
+    }
+    else if (Element == (NMSP_officeMath | XML_oMathPara))
     {
         mnMathJcVal = eMathParaJc::CENTER;
         mbIsMathPara = true;
     }
-    if (Element == (NMSP_officeMath | XML_jc) && mpParent && mpParent->mpParent )
+    else if (Element == (NMSP_officeMath | XML_jc) && mpParent && mpParent->mpParent )
     {
         mbIsMathPara = true;
         auto aAttrLst = Attribs->getFastAttributes();
@@ -370,6 +381,11 @@ void OOXMLFastContextHandler::startCharacterGroup()
         mpStream->startCharacterGroup();
         mpParserState->setInCharacterGroup(true);
         mpParserState->resolveCharacterProperties(*mpStream);
+        if (mpParserState->isStartFootnote())
+        {
+            mpStream->utext(reinterpret_cast<const sal_uInt8*>(&uFtnSep), 1);
+            mpParserState->setStartFootnote(false);
+        }
     }
 
     // tdf#108714 : if we have a postponed break information,
@@ -1340,7 +1356,9 @@ void OOXMLFastContextHandlerXNote::lcl_startFastElement
     mbForwardEventsSaved = isForwardEvents();
 
     // If this is the note we're looking for or this is the footnote separator one.
-    if (mnMyXNoteId == getXNoteId() || static_cast<sal_uInt32>(mnMyXNoteType) == NS_ooxml::LN_Value_doc_ST_FtnEdn_separator)
+    if (mnMyXNoteId == getXNoteId() ||
+            static_cast<sal_uInt32>(mnMyXNoteType) == NS_ooxml::LN_Value_doc_ST_FtnEdn_separator ||
+            mpParserState->isStartFootnote())
         setForwardEvents(true);
     else
         setForwardEvents(false);
diff --git a/writerfilter/source/ooxml/OOXMLParserState.cxx b/writerfilter/source/ooxml/OOXMLParserState.cxx
index c103566d5331..3d35cbb9deac 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.cxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.cxx
@@ -40,7 +40,8 @@ OOXMLParserState::OOXMLParserState() :
     inTxbxContent(false),
     savedInParagraphGroup(false),
     savedInCharacterGroup(false),
-    savedLastParagraphInSection(false)
+    savedLastParagraphInSection(false),
+    mbStartFootnote(false)
 {
 }
 
@@ -275,6 +276,11 @@ void OOXMLParserState::endTxbxContent()
     inTxbxContent = false;
 }
 
+void OOXMLParserState::setStartFootnote(bool bStartFootnote)
+{
+    mbStartFootnote = bStartFootnote;
+}
+
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/ooxml/OOXMLParserState.hxx b/writerfilter/source/ooxml/OOXMLParserState.hxx
index 422ba0b776ef..7d094dbd42b7 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.hxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.hxx
@@ -58,6 +58,7 @@ class OOXMLParserState final : public virtual SvRefBase
     bool savedLastParagraphInSection;
     std::vector<SavedAlternateState> maSavedAlternateStates;
     std::vector<OOXMLPropertySet::Pointer_t> mvPostponedBreaks;
+    bool mbStartFootnote;
 
 public:
     typedef tools::SvRef<OOXMLParserState> Pointer_t;
@@ -112,6 +113,9 @@ public:
 
     void startTxbxContent();
     void endTxbxContent();
+
+    void setStartFootnote(bool bStartFootnote);
+    bool isStartFootnote() const { return mbStartFootnote; }
 };
 }
 


More information about the Libreoffice-commits mailing list