[Libreoffice-commits] core.git: Branch 'libreoffice-6-0' - filter/CppunitTest_filter_textfilterdetect.mk filter/Module_filter.mk filter/qa filter/source

Miklos Vajna vmiklos at collabora.co.uk
Sat Jan 27 20:04:00 UTC 2018


 filter/CppunitTest_filter_textfilterdetect.mk   |   46 +++++++++++++++++
 filter/Module_filter.mk                         |    1 
 filter/qa/unit/data/tdf114428.xhtml             |    9 +++
 filter/qa/unit/textfilterdetect.cxx             |   63 ++++++++++++++++++++++++
 filter/source/textfilterdetect/filterdetect.cxx |   24 ++++++++-
 5 files changed, 140 insertions(+), 3 deletions(-)

New commits:
commit 6aa65f7664fe0dbe8c9d4ba7f320ef216e928780
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Wed Dec 13 09:49:41 2017 +0100

    tdf#114428 filter: recognize XHTML with XML declaration as HTML
    
    The problem was the additional
    
            <?xml version="1.0" encoding="utf-8"?>
    
    XML declaration before the usual
    
            <!DOCTYPE html ...
    
    line, just ignore it.
    
    Change-Id: I294aae5504b40b42f76da00fef645d0d89009da9
    Reviewed-on: https://gerrit.libreoffice.org/46324
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
    Tested-by: Jenkins <ci at libreoffice.org>
    (cherry picked from commit 4af729f31c64c09c76ea8bcfa5067092571b92de)
    Reviewed-on: https://gerrit.libreoffice.org/47587
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>
    Tested-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/filter/CppunitTest_filter_textfilterdetect.mk b/filter/CppunitTest_filter_textfilterdetect.mk
new file mode 100644
index 000000000000..dfcaee9ce16a
--- /dev/null
+++ b/filter/CppunitTest_filter_textfilterdetect.mk
@@ -0,0 +1,46 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,filter_textfilterdetect))
+
+$(eval $(call gb_CppunitTest_use_api,filter_textfilterdetect,\
+	offapi \
+	udkapi \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,filter_textfilterdetect, \
+	comphelper \
+	cppu \
+	cppuhelper \
+	sal \
+	test \
+	textfd \
+	tl \
+	unotest \
+	utl \
+))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,filter_textfilterdetect, \
+	filter/qa/unit/textfilterdetect \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,filter_textfilterdetect))
+
+$(eval $(call gb_CppunitTest_use_vcl,filter_textfilterdetect))
+
+$(eval $(call gb_CppunitTest_use_components,filter_textfilterdetect,\
+    configmgr/source/configmgr \
+    filter/source/textfilterdetect/textfd \
+    ucb/source/core/ucb1 \
+    ucb/source/ucp/file/ucpfile1 \
+))
+
+$(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
+
+# vim: set noet sw=4 ts=4:
diff --git a/filter/Module_filter.mk b/filter/Module_filter.mk
index a7834b6dc631..08aa0f2d395b 100644
--- a/filter/Module_filter.mk
+++ b/filter/Module_filter.mk
@@ -57,6 +57,7 @@ $(eval $(call gb_Module_add_check_targets,filter,\
     CppunitTest_filter_xslt \
     CppunitTest_filter_priority \
     CppunitTest_filter_msfilter \
+    CppunitTest_filter_textfilterdetect \
 ))
 
 ifneq ($(DISABLE_CVE_TESTS),TRUE)
diff --git a/filter/qa/unit/data/tdf114428.xhtml b/filter/qa/unit/data/tdf114428.xhtml
new file mode 100644
index 000000000000..f08f0fa4a028
--- /dev/null
+++ b/filter/qa/unit/data/tdf114428.xhtml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>Title of document</title>
+  </head>
+  <body>hello world</body>
+</html>
diff --git a/filter/qa/unit/textfilterdetect.cxx b/filter/qa/unit/textfilterdetect.cxx
new file mode 100644
index 000000000000..272ba85b330b
--- /dev/null
+++ b/filter/qa/unit/textfilterdetect.cxx
@@ -0,0 +1,63 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <com/sun/star/document/XExtendedFilterDetection.hpp>
+#include <com/sun/star/io/XInputStream.hpp>
+
+#include <comphelper/processfactory.hxx>
+#include <comphelper/propertyvalue.hxx>
+#include <test/bootstrapfixture.hxx>
+#include <unotools/mediadescriptor.hxx>
+#include <unotools/streamwrap.hxx>
+
+using namespace com::sun::star;
+
+namespace
+{
+/// Test class for PlainTextFilterDetect.
+class TextFilterDetectTest : public test::BootstrapFixture
+{
+public:
+    void testTdf114428();
+
+    CPPUNIT_TEST_SUITE(TextFilterDetectTest);
+    CPPUNIT_TEST(testTdf114428);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+char const DATA_DIRECTORY[] = "/filter/qa/unit/data/";
+
+void TextFilterDetectTest::testTdf114428()
+{
+    uno::Reference<uno::XComponentContext> xComponentContext
+        = comphelper::getComponentContext(getMultiServiceFactory());
+    uno::Reference<document::XExtendedFilterDetection> xDetect(
+        getMultiServiceFactory()->createInstance("com.sun.star.comp.filters.PlainTextFilterDetect"),
+        uno::UNO_QUERY);
+    OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf114428.xhtml";
+    SvFileStream aStream(aURL, StreamMode::READ);
+    uno::Reference<io::XInputStream> xStream(new utl::OStreamWrapper(aStream));
+    uno::Sequence<beans::PropertyValue> aDescriptor
+        = { comphelper::makePropertyValue("DocumentService",
+                                          OUString("com.sun.star.text.TextDocument")),
+            comphelper::makePropertyValue("InputStream", xStream),
+            comphelper::makePropertyValue("TypeName", OUString("generic_HTML")) };
+    xDetect->detect(aDescriptor);
+    utl::MediaDescriptor aMediaDesc(aDescriptor);
+    OUString aFilterName = aMediaDesc.getUnpackedValueOrDefault("FilterName", OUString());
+    // This was empty, XML declaration caused HTML detect to not handle XHTML.
+    CPPUNIT_ASSERT_EQUAL(OUString("HTML (StarWriter)"), aFilterName);
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(TextFilterDetectTest);
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx
index ee93d28ddbed..3228ca53f62f 100644
--- a/filter/source/textfilterdetect/filterdetect.cxx
+++ b/filter/source/textfilterdetect/filterdetect.cxx
@@ -58,6 +58,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
     // Now check whether the stream begins with a known HTML tag.
     enum DetectPhase { BeforeTag, TagOpened, InTagName };
     DetectPhase dp = BeforeTag;
+    /// BeforeDeclaration -> ? -> DeclarationOpened -> > -> BeforeDeclaration.
+    enum DeclarationPhase
+    {
+        BeforeDeclaration,
+        DeclarationOpened
+    };
+    DeclarationPhase eDeclaration = BeforeDeclaration;
 
     const char* pHeader = sHeader.getStr();
     const int   nLength = sHeader.getLength();
@@ -66,7 +73,8 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
     for ( i = 0; i < nLength; ++i, ++pHeader )
     {
         char c = *pHeader;
-        if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
+        if ((c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f')
+            && eDeclaration == BeforeDeclaration)
         {
             if ( dp == TagOpened )
                 return false; // Invalid: Should start with a tag name
@@ -84,6 +92,11 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
         {
             if ( dp == InTagName )
                 break; // End of tag name reached
+            else if (eDeclaration == DeclarationOpened)
+            {
+                dp = BeforeTag;
+                eDeclaration = BeforeDeclaration;
+            }
             else
                 return false; // Invalid: Empty tag or before '<'
         }
@@ -100,8 +113,13 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
                 return false; // Invalid: Should start with a tag
             else if ( dp == TagOpened )
             {
-                nStartOfTagIndex = i;
-                dp = InTagName;
+                if (c == '?' && eDeclaration == BeforeDeclaration)
+                    eDeclaration = DeclarationOpened;
+                else if (eDeclaration == BeforeDeclaration)
+                {
+                    nStartOfTagIndex = i;
+                    dp = InTagName;
+                }
             }
         }
     }


More information about the Libreoffice-commits mailing list