[Libreoffice-commits] core.git: Branch 'libreoffice-6-3' - writerfilter/source

Michael Stahl (via logerrit) logerrit at kemper.freedesktop.org
Mon Nov 4 08:28:59 UTC 2019


 writerfilter/source/rtftok/rtfdocumentimpl.cxx |   25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

New commits:
commit 612d252671d618008fd98536f9024e1b406231fc
Author:     Michael Stahl <Michael.Stahl at cib.de>
AuthorDate: Tue Oct 29 15:54:41 2019 +0100
Commit:     Miklos Vajna <vmiklos at collabora.com>
CommitDate: Mon Nov 4 09:28:24 2019 +0100

    writerfilter: rtftok: filter control characters
    
    ... in RTFDocumentImpl::checkUnicode(); see ooo86460-1.xls [sic]
    for an example.
    
    There is another caller of text() in rtfdispatchdestination.cxx:311 but
    it turns out that buffered text was created by text() in the first
    place.
    
    This shouldn't be a problem for DOCX because XML 1.0 doesn't allow the
    bad control characters anyway so the sax parser should report an error
    in that case.
    
    (cherry picked from commit a6516c76c01b92f7d35bfb352b63af7de42b5707)
    
    Change-Id: Ice45e1c3c8c7db668a4cfb8364e42addea1777ce
    Reviewed-on: https://gerrit.libreoffice.org/81780
    Tested-by: Jenkins
    Reviewed-by: Miklos Vajna <vmiklos at collabora.com>

diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index c321d3c264b6..3a4e27b5b64d 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -29,6 +29,7 @@
 #include <tools/datetimeutils.hxx>
 #include <comphelper/classids.hxx>
 #include <comphelper/embeddedobjectcontainer.hxx>
+#include <svl/lngmisc.hxx>
 #include <sfx2/sfxbasemodel.hxx>
 #include <sfx2/classificationhelper.hxx>
 #include <oox/mathml/import.hxx>
@@ -3569,11 +3570,34 @@ bool RTFDocumentImpl::getSkipUnknown() { return m_bSkipUnknown; }
 
 void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown) { m_bSkipUnknown = bSkipUnknown; }
 
+static auto FilterControlChars(Destination const destination, OUString const& rString) -> OUString
+{
+    if (destination == Destination::LEVELNUMBERS || destination == Destination::LEVELTEXT)
+    { // control characters are magic here!
+        return rString;
+    }
+    OUStringBuffer buf(rString.getLength());
+    for (sal_Int32 i = 0; i < rString.getLength(); ++i)
+    {
+        sal_Unicode const ch(rString[i]);
+        if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch == '\t')
+        {
+            buf.append(ch);
+        }
+        else
+        {
+            SAL_INFO("writerfilter.rtf", "filtering control character");
+        }
+    }
+    return buf.makeStringAndClear();
+}
+
 void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex)
 {
     if (bUnicode && !m_aUnicodeBuffer.isEmpty())
     {
         OUString aString = m_aUnicodeBuffer.makeStringAndClear();
+        aString = FilterControlChars(m_aStates.top().getDestination(), aString);
         text(aString);
     }
     if (bHex && !m_aHexBuffer.isEmpty())
@@ -3583,6 +3607,7 @@ void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex)
             && m_aStates.top().getCurrentEncoding() == RTL_TEXTENCODING_SYMBOL)
             nEncoding = RTL_TEXTENCODING_MS_1252;
         OUString aString = OStringToOUString(m_aHexBuffer.makeStringAndClear(), nEncoding);
+        aString = FilterControlChars(m_aStates.top().getDestination(), aString);
         text(aString);
     }
 }


More information about the Libreoffice-commits mailing list