[Libreoffice-commits] core.git: 2 commits - filter/source svtools/CppunitTest_svtools_html.mk svtools/qa svtools/source

Miklos Vajna vmiklos at collabora.co.uk
Thu Dec 14 08:14:17 UTC 2017


 filter/source/config/fragments/types/generic_HTML.xcu |    2 
 svtools/CppunitTest_svtools_html.mk                   |    1 
 svtools/qa/unit/testHtmlReader.cxx                    |   70 ++++++++++++++++++
 svtools/source/svhtml/parhtml.cxx                     |    3 
 4 files changed, 74 insertions(+), 2 deletions(-)

New commits:
commit 3fe64261b5658e28e2c0a1630cf878f066f77f0c
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Wed Dec 13 14:46:26 2017 +0100

    Related: tdf#114428 svtools HTML import: avoid XML declaration in body text
    
    Just ignore it for now.
    
    Change-Id: Idf82af611370d957c6704cce250941a8a0b90637
    Reviewed-on: https://gerrit.libreoffice.org/46388
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>

diff --git a/svtools/CppunitTest_svtools_html.mk b/svtools/CppunitTest_svtools_html.mk
index e3e56e4d9949..6fbca2c06442 100644
--- a/svtools/CppunitTest_svtools_html.mk
+++ b/svtools/CppunitTest_svtools_html.mk
@@ -14,6 +14,7 @@ $(eval $(call gb_CppunitTest_use_external,svtools_html,boost_headers))
 $(eval $(call gb_CppunitTest_use_sdk_api,svtools_html))
 
 $(eval $(call gb_CppunitTest_add_exception_objects,svtools_html, \
+    svtools/qa/unit/testHtmlReader \
     svtools/qa/unit/testHtmlWriter \
 ))
 
diff --git a/svtools/qa/unit/testHtmlReader.cxx b/svtools/qa/unit/testHtmlReader.cxx
new file mode 100644
index 000000000000..151976eabc9d
--- /dev/null
+++ b/svtools/qa/unit/testHtmlReader.cxx
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <com/sun/star/document/XDocumentProperties.hpp>
+#include <svtools/parhtml.hxx>
+#include <tools/ref.hxx>
+#include <tools/stream.hxx>
+
+namespace
+{
+/// Subclass of HTMLParser that can sense the import result.
+class TestHTMLParser : public HTMLParser
+{
+public:
+    TestHTMLParser(SvStream& rStream);
+    virtual void NextToken(HtmlTokenId nToken) override;
+
+    OUString m_aDocument;
+};
+
+TestHTMLParser::TestHTMLParser(SvStream& rStream)
+    : HTMLParser(rStream)
+{
+}
+
+void TestHTMLParser::NextToken(HtmlTokenId nToken)
+{
+    if (nToken == HtmlTokenId::TEXTTOKEN)
+        m_aDocument += aToken;
+}
+
+/// Tests HTMLParser.
+class Test : public CppUnit::TestFixture
+{
+public:
+    void testTdf114428();
+
+    CPPUNIT_TEST_SUITE(Test);
+    CPPUNIT_TEST(testTdf114428);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+void Test::testTdf114428()
+{
+    SvMemoryStream aStream;
+    OString aDocument("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<html>hello</html>");
+    aStream.WriteBytes(aDocument.getStr(), aDocument.getLength());
+    aStream.Seek(0);
+
+    tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream);
+    xParser->CallParser();
+
+    // This was '<?xml version="1.0" encoding="utf-8"?> hello', XML declaration
+    // was not ignored.
+    CPPUNIT_ASSERT_EQUAL(OUString("hello"), xParser->m_aDocument.trim());
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(Test);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index 9e5974ae5d40..7ea1750974be 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -1042,7 +1042,8 @@ HtmlTokenId HTMLParser::GetNextToken_()
                     bOffState = true;
                     nNextCh = GetNextChar();
                 }
-                if( rtl::isAsciiAlpha( nNextCh ) || '!'==nNextCh )
+                // Assume '<?' is a start of an XML declaration, ignore it.
+                if (rtl::isAsciiAlpha(nNextCh) || nNextCh == '!' || nNextCh == '?')
                 {
                     OUStringBuffer sTmpBuffer;
                     do {
commit 14daba5bd0ba64ff53ad98de7a84537ff03024ea
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Wed Dec 13 11:13:40 2017 +0100

    Related: tdf#114428 filter: associate .xhtml with HTML import
    
    Since there is no XHTML import; and this way the expensive "deep"
    detection (that calls into all the DLP libs, etc) can be avoided.
    
    Times for a hello world input: 56 -> 23 ms is spent in
    Desktop::loadComponentFromURL() (41% of original).
    
    Change-Id: Ia2dec3837cf0c548ae2c5a0ca4d47a57a6cbb92a
    Reviewed-on: https://gerrit.libreoffice.org/46387
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>

diff --git a/filter/source/config/fragments/types/generic_HTML.xcu b/filter/source/config/fragments/types/generic_HTML.xcu
index b00b048d3842..b29ba333aded 100644
--- a/filter/source/config/fragments/types/generic_HTML.xcu
+++ b/filter/source/config/fragments/types/generic_HTML.xcu
@@ -18,7 +18,7 @@
     <node oor:name="generic_HTML" oor:op="replace" >
         <prop oor:name="DetectService"><value>com.sun.star.comp.filters.PlainTextFilterDetect</value></prop>
         <prop oor:name="URLPattern"><value>private:factory/swriter/web*</value></prop>
-        <prop oor:name="Extensions"><value>html htm</value></prop>
+        <prop oor:name="Extensions"><value>html htm xhtml</value></prop>
         <prop oor:name="MediaType"><value>text/html</value></prop>
         <prop oor:name="Preferred"><value>false</value></prop>
         <prop oor:name="PreferredFilter"><value>HTML</value></prop>


More information about the Libreoffice-commits mailing list