[Libreoffice-commits] core.git: Branch 'libreoffice-7-1' - writerfilter/CppunitTest_writerfilter_rtftok.mk writerfilter/qa writerfilter/source
Miklos Vajna (via logerrit)
logerrit at kemper.freedesktop.org
Tue Jun 1 08:43:35 UTC 2021
writerfilter/CppunitTest_writerfilter_rtftok.mk | 1
writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf | 3
writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx | 63 +++++++++++++++
writerfilter/source/rtftok/rtftokenizer.cxx | 15 ++-
4 files changed, 77 insertions(+), 5 deletions(-)
New commits:
commit 87c307b6fc1eb86aa194832e9a293df435ed3f87
Author: Miklos Vajna <vmiklos at collabora.com>
AuthorDate: Mon May 31 21:12:12 2021 +0200
Commit: Xisco Fauli <xiscofauli at libreoffice.org>
CommitDate: Tue Jun 1 10:42:59 2021 +0200
tdf#142325 RTF import: tolerate invalid hex markup like "\'3?"
The RTF spec says \'hh is the expected form, where both "h" are 0-9, a-f
or A-F. But Word accepts the bugdoc, so don't reject this input, handle
\'<number><junk> as \'0<number>.
At least the current case ignores the actual value, as it's a single
character to provide a non-unicode value after \uN for old readers that
don't support Unicode.
Change-Id: Ib61247ab08278ca5012cc887cee26c7571c29fc6
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/116499
Tested-by: Jenkins
Reviewed-by: Miklos Vajna <vmiklos at collabora.com>
(cherry picked from commit 6fc8a6b0b52509d735971f079d7b1660559d475d)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/116457
Reviewed-by: Xisco Fauli <xiscofauli at libreoffice.org>
diff --git a/writerfilter/CppunitTest_writerfilter_rtftok.mk b/writerfilter/CppunitTest_writerfilter_rtftok.mk
index db038292ebdd..07271b777ae0 100644
--- a/writerfilter/CppunitTest_writerfilter_rtftok.mk
+++ b/writerfilter/CppunitTest_writerfilter_rtftok.mk
@@ -18,6 +18,7 @@ $(eval $(call gb_CppunitTest_use_externals,writerfilter_rtftok,\
$(eval $(call gb_CppunitTest_add_exception_objects,writerfilter_rtftok, \
writerfilter/qa/cppunittests/rtftok/rtfsdrimport \
writerfilter/qa/cppunittests/rtftok/rtfsprm \
+ writerfilter/qa/cppunittests/rtftok/rtftokenizer \
))
$(eval $(call gb_CppunitTest_use_libraries,writerfilter_rtftok, \
diff --git a/writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf b/writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf
new file mode 100644
index 000000000000..8f9224c0e905
--- /dev/null
+++ b/writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf
@@ -0,0 +1,3 @@
+{\rtf1
+x\u345\'3?x
+\par}
diff --git a/writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx b/writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx
new file mode 100644
index 000000000000..530e9bb7245a
--- /dev/null
+++ b/writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx
@@ -0,0 +1,63 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <test/bootstrapfixture.hxx>
+#include <unotest/macros_test.hxx>
+
+#include <com/sun/star/frame/Desktop.hpp>
+#include <com/sun/star/text/XTextDocument.hpp>
+
+using namespace ::com::sun::star;
+
+namespace
+{
+/// Tests for writerfilter/source/rtftok/rtftokenizer.cxx.
+class Test : public test::BootstrapFixture, public unotest::MacrosTest
+{
+private:
+ uno::Reference<lang::XComponent> mxComponent;
+
+public:
+ void setUp() override;
+ void tearDown() override;
+ uno::Reference<lang::XComponent>& getComponent() { return mxComponent; }
+};
+
+void Test::setUp()
+{
+ test::BootstrapFixture::setUp();
+
+ mxDesktop.set(frame::Desktop::create(mxComponentContext));
+}
+
+void Test::tearDown()
+{
+ if (mxComponent.is())
+ mxComponent->dispose();
+
+ test::BootstrapFixture::tearDown();
+}
+
+constexpr OUStringLiteral DATA_DIRECTORY = u"/writerfilter/qa/cppunittests/rtftok/data/";
+
+CPPUNIT_TEST_FIXTURE(Test, testInvalidHex)
+{
+ // Given a document with a markup like "\'3?":
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "invalid-hex.rtf";
+
+ // When load that document:
+ getComponent() = loadFromDesktop(aURL);
+
+ // Then make sure the result matches Word, rather than just refusing to import the document:
+ uno::Reference<text::XTextDocument> xTextDocument(getComponent(), uno::UNO_QUERY);
+ CPPUNIT_ASSERT_EQUAL(OUString::fromUtf8("xřx"), xTextDocument->getText()->getString());
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/rtftok/rtftokenizer.cxx b/writerfilter/source/rtftok/rtftokenizer.cxx
index bfd29c38f118..b205227b94e3 100644
--- a/writerfilter/source/rtftok/rtftokenizer.cxx
+++ b/writerfilter/source/rtftok/rtftokenizer.cxx
@@ -144,11 +144,16 @@ RTFError RTFTokenizer::resolveParse()
else
{
SAL_INFO("writerfilter.rtf", __func__ << ": hex internal state");
- b = b << 4;
- sal_Int8 parsed = msfilter::rtfutil::AsHex(ch);
- if (parsed == -1)
- return RTFError::HEX_INVALID;
- b += parsed;
+ // Assume that \'<number><junk> means \'0<number>.
+ if (rtl::isAsciiDigit(static_cast<unsigned char>(ch))
+ || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
+ {
+ b = b << 4;
+ sal_Int8 parsed = msfilter::rtfutil::AsHex(ch);
+ if (parsed == -1)
+ return RTFError::HEX_INVALID;
+ b += parsed;
+ }
count--;
if (!count)
{
More information about the Libreoffice-commits
mailing list