[Libreoffice-commits] core.git: writerfilter/source
Michael Stahl (via logerrit)
logerrit at kemper.freedesktop.org
Wed Oct 30 13:45:37 UTC 2019
writerfilter/source/rtftok/rtfdocumentimpl.cxx | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
New commits:
commit a6516c76c01b92f7d35bfb352b63af7de42b5707
Author: Michael Stahl <Michael.Stahl at cib.de>
AuthorDate: Tue Oct 29 15:54:41 2019 +0100
Commit: Michael Stahl <michael.stahl at cib.de>
CommitDate: Wed Oct 30 14:44:09 2019 +0100
writerfilter: rtftok: filter control characters
... in RTFDocumentImpl::checkUnicode(); see ooo86460-1.xls [sic]
for an example.
There is another caller of text() in rtfdispatchdestination.cxx:311 but
it turns out that buffered text was created by text() in the first
place.
This shouldn't be a problem for DOCX because XML 1.0 doesn't allow the
bad control characters anyway so the sax parser should report an error
in that case.
Change-Id: Ice45e1c3c8c7db668a4cfb8364e42addea1777ce
Reviewed-on: https://gerrit.libreoffice.org/81697
Reviewed-by: Michael Stahl <michael.stahl at cib.de>
Tested-by: Michael Stahl <michael.stahl at cib.de>
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index f2873ecf97b8..a2d0d1635e7c 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -30,6 +30,7 @@
#include <tools/datetimeutils.hxx>
#include <comphelper/classids.hxx>
#include <comphelper/embeddedobjectcontainer.hxx>
+#include <svl/lngmisc.hxx>
#include <sfx2/sfxbasemodel.hxx>
#include <sfx2/classificationhelper.hxx>
#include <oox/mathml/import.hxx>
@@ -3563,12 +3564,35 @@ bool RTFDocumentImpl::getSkipUnknown() { return m_bSkipUnknown; }
void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown) { m_bSkipUnknown = bSkipUnknown; }
+static auto FilterControlChars(Destination const destination, OUString const& rString) -> OUString
+{
+ if (destination == Destination::LEVELNUMBERS || destination == Destination::LEVELTEXT)
+ { // control characters are magic here!
+ return rString;
+ }
+ OUStringBuffer buf(rString.getLength());
+ for (sal_Int32 i = 0; i < rString.getLength(); ++i)
+ {
+ sal_Unicode const ch(rString[i]);
+ if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch == '\t')
+ {
+ buf.append(ch);
+ }
+ else
+ {
+ SAL_INFO("writerfilter.rtf", "filtering control character");
+ }
+ }
+ return buf.makeStringAndClear();
+}
+
void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex)
{
if (bUnicode && !m_aUnicodeBuffer.isEmpty())
{
OUString aString = m_aUnicodeBuffer.toString();
m_aUnicodeBuffer.setLength(0);
+ aString = FilterControlChars(m_aStates.top().getDestination(), aString);
text(aString);
}
if (bHex && !m_aHexBuffer.isEmpty())
@@ -3579,6 +3603,7 @@ void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex)
nEncoding = RTL_TEXTENCODING_MS_1252;
OUString aString = OStringToOUString(m_aHexBuffer.toString(), nEncoding);
m_aHexBuffer.setLength(0);
+ aString = FilterControlChars(m_aStates.top().getDestination(), aString);
text(aString);
}
}
More information about the Libreoffice-commits
mailing list