[Libreoffice-commits] core.git: filter/Library_htmlfd.mk filter/Library_textfd.mk filter/Module_filter.mk filter/source postprocess/Rdb_services.mk Repository.mk solenv/gbuild sw/CppunitTest_sw_htmlexport.mk

Maxim Monastirsky momonasmon at gmail.com
Sat Mar 1 12:46:09 PST 2014


 Repository.mk                                         |    1 
 filter/Library_htmlfd.mk                              |   36 --
 filter/Library_textfd.mk                              |    5 
 filter/Module_filter.mk                               |    1 
 filter/source/config/fragments/types/generic_HTML.xcu |    2 
 filter/source/htmlfilterdetect/fdcomp.cxx             |   36 --
 filter/source/htmlfilterdetect/filterdetect.cxx       |  232 ------------------
 filter/source/htmlfilterdetect/filterdetect.hxx       |   64 ----
 filter/source/htmlfilterdetect/htmlfd.component       |   15 -
 filter/source/textfilterdetect/filterdetect.cxx       |  178 +++++++++----
 postprocess/Rdb_services.mk                           |    1 
 solenv/gbuild/extensions/pre_MergedLibsList.mk        |    1 
 sw/CppunitTest_sw_htmlexport.mk                       |    2 
 13 files changed, 128 insertions(+), 446 deletions(-)

New commits:
commit d5890e87ab5e298e9a74ed0d552b01a98e59b1fa
Author: Maxim Monastirsky <momonasmon at gmail.com>
Date:   Sat Mar 1 20:03:41 2014 +0200

    Merge HTML detection to text detection service
    
    Setting the filter name explicitly is not really required,
    because TypeDetection::impl_checkResultsAndAddBestFilter
    is able to select the correct filter anyway. But it seems that
    other detection services also do it, so I followed that way.
    
    Change-Id: I6e73fa79c6867d82f98d63e8d2b7865446f088ef
    Reviewed-on: https://gerrit.libreoffice.org/8213
    Reviewed-by: Kohei Yoshida <libreoffice at kohei.us>
    Tested-by: Kohei Yoshida <libreoffice at kohei.us>

diff --git a/Repository.mk b/Repository.mk
index ba433f3..671f0dc 100644
--- a/Repository.mk
+++ b/Repository.mk
@@ -271,7 +271,6 @@ $(eval $(call gb_Helper_register_libraries_for_install,OOOLIBS,ooo, \
 	$(if $(ENABLE_DIRECTX),gdipluscanvas) \
 	guesslang \
 	$(if $(filter DESKTOP,$(BUILD_TYPE)),helplinker) \
-	htmlfd \
 	i18npool \
 	i18nsearch \
 	hyphen \
diff --git a/filter/Library_htmlfd.mk b/filter/Library_htmlfd.mk
deleted file mode 100644
index cfb708b..0000000
--- a/filter/Library_htmlfd.mk
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
-#*************************************************************************
-#
-# This file is part of the LibreOffice project.
-#
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-#*************************************************************************
-
-$(eval $(call gb_Library_Library,htmlfd))
-
-$(eval $(call gb_Library_set_componentfile,htmlfd,filter/source/htmlfilterdetect/htmlfd))
-
-$(eval $(call gb_Library_use_external,htmlfd,boost_headers))
-
-$(eval $(call gb_Library_use_sdk_api,htmlfd))
-
-$(eval $(call gb_Library_use_libraries,htmlfd,\
-	ucbhelper \
-	cppuhelper \
-	cppu \
-	sal \
-	tl \
-	utl \
-	svt \
-	$(gb_UWINAPI) \
-))
-
-$(eval $(call gb_Library_add_exception_objects,htmlfd,\
-	filter/source/htmlfilterdetect/fdcomp \
-	filter/source/htmlfilterdetect/filterdetect \
-))
-
-# vim: set noet sw=4 ts=4:
diff --git a/filter/Library_textfd.mk b/filter/Library_textfd.mk
index ac1cda3..ef470b7 100644
--- a/filter/Library_textfd.mk
+++ b/filter/Library_textfd.mk
@@ -13,14 +13,19 @@ $(eval $(call gb_Library_Library,textfd))
 
 $(eval $(call gb_Library_set_componentfile,textfd,filter/source/textfilterdetect/textfd))
 
+$(eval $(call gb_Library_use_external,textfd,boost_headers))
+
 $(eval $(call gb_Library_use_sdk_api,textfd))
 
 $(eval $(call gb_Library_use_libraries,textfd,\
+	comphelper \
 	ucbhelper \
 	cppuhelper \
 	cppu \
 	sal \
 	tl \
+	utl \
+	svt \
 	$(gb_UWINAPI) \
 ))
 
diff --git a/filter/Module_filter.mk b/filter/Module_filter.mk
index 58307b4..403184a 100644
--- a/filter/Module_filter.mk
+++ b/filter/Module_filter.mk
@@ -34,7 +34,6 @@ $(eval $(call gb_Module_add_targets,filter,\
 	    Library_exp) \
 	Library_filterconfig \
 	Library_flash \
-	Library_htmlfd \
 	Library_icd \
 	Library_icg \
 	Library_idx \
diff --git a/filter/source/config/fragments/types/generic_HTML.xcu b/filter/source/config/fragments/types/generic_HTML.xcu
index 58ffedc..b00b048 100644
--- a/filter/source/config/fragments/types/generic_HTML.xcu
+++ b/filter/source/config/fragments/types/generic_HTML.xcu
@@ -16,7 +16,7 @@
  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 -->
     <node oor:name="generic_HTML" oor:op="replace" >
-        <prop oor:name="DetectService"><value>com.sun.star.comp.filters.HtmlFilterDetect</value></prop>
+        <prop oor:name="DetectService"><value>com.sun.star.comp.filters.PlainTextFilterDetect</value></prop>
         <prop oor:name="URLPattern"><value>private:factory/swriter/web*</value></prop>
         <prop oor:name="Extensions"><value>html htm</value></prop>
         <prop oor:name="MediaType"><value>text/html</value></prop>
diff --git a/filter/source/htmlfilterdetect/fdcomp.cxx b/filter/source/htmlfilterdetect/fdcomp.cxx
deleted file mode 100644
index 40360e9..0000000
--- a/filter/source/htmlfilterdetect/fdcomp.cxx
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#include <sal/config.h>
-
-#include <cppuhelper/factory.hxx>
-#include <cppuhelper/implementationentry.hxx>
-#include <sal/types.h>
-
-#include "filterdetect.hxx"
-
-namespace {
-
-static cppu::ImplementationEntry const services[] = {
-    { &HtmlFilterDetect_createInstance, &HtmlFilterDetect_getImplementationName,
-      &HtmlFilterDetect_getSupportedServiceNames,
-      &cppu::createSingleComponentFactory, 0, 0 },
-    { 0, 0, 0, 0, 0, 0 }
-};
-
-}
-
-extern "C" SAL_DLLPUBLIC_EXPORT void * SAL_CALL htmlfd_component_getFactory(
-    char const * pImplName, void * pServiceManager, void * pRegistryKey)
-{
-    return cppu::component_getFactoryHelper(
-        pImplName, pServiceManager, pRegistryKey, services);
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/filter/source/htmlfilterdetect/filterdetect.cxx b/filter/source/htmlfilterdetect/filterdetect.cxx
deleted file mode 100644
index 5b617c4..0000000
--- a/filter/source/htmlfilterdetect/filterdetect.cxx
+++ /dev/null
@@ -1,232 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#include "filterdetect.hxx"
-
-#include <svtools/htmltokn.h>
-#include <tools/urlobj.hxx>
-#include <ucbhelper/content.hxx>
-#include <unotools/mediadescriptor.hxx>
-#include <unotools/ucbstreamhelper.hxx>
-
-#include <com/sun/star/io/XInputStream.hpp>
-#include <cppuhelper/supportsservice.hxx>
-
-#include <boost/scoped_ptr.hpp>
-
-using com::sun::star::io::XInputStream;
-using com::sun::star::uno::Sequence;
-using com::sun::star::uno::Reference;
-using com::sun::star::uno::Any;
-using com::sun::star::uno::XComponentContext;
-using com::sun::star::uno::XInterface;
-using com::sun::star::uno::Exception;
-using com::sun::star::uno::RuntimeException;
-using com::sun::star::ucb::XCommandEnvironment;
-
-using namespace com::sun::star;
-using namespace com::sun::star::beans;
-
-namespace {
-
-enum DetectPhase {
-    BeforeTag,
-    TagOpened,
-    InTagName
-};
-
-bool isHTMLStream(const OString& aStreamHeader)
-{
-    const char* pHeader = aStreamHeader.getStr();
-    const int   nLength = aStreamHeader.getLength();
-    int nStartOfTagIndex = 0;
-    int i = 0;
-
-    DetectPhase dp = BeforeTag;
-
-    for ( i = 0; i < nLength; ++i, ++pHeader )
-    {
-        char c = *pHeader;
-        if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
-        {
-            if ( dp == TagOpened )
-                return false; // Invalid: Should start with a tag name
-            else if ( dp == InTagName )
-                break; // End of tag name reached
-        }
-        else if ( c == '<' )
-        {
-            if ( dp == BeforeTag )
-                dp = TagOpened;
-            else
-                return false; // Invalid: Nested '<'
-        }
-        else if ( c == '>' )
-        {
-            if ( dp == InTagName )
-                break; // End of tag name reached
-            else
-                return false; // Invalid: Empty tag or before '<'
-        }
-        else if ( c == '!' )
-        {
-            if ( dp == TagOpened )
-                return true; // "<!" - DOCTYPE or comments block
-            else
-                return false; // Invalid: '!' before '<' or inside tag name
-        }
-        else
-        {
-            if ( dp == BeforeTag )
-                return false; // Invalid: Should start with a tag
-            else if ( dp == TagOpened )
-            {
-                nStartOfTagIndex = i;
-                dp = InTagName;
-            }
-        }
-    }
-
-    // The string following '<' has to be a known HTML token.
-    if ( GetHTMLToken( OStringToOUString( aStreamHeader.copy( nStartOfTagIndex, i - nStartOfTagIndex ),
-                                                RTL_TEXTENCODING_ASCII_US ) ) != 0 )
-        return true;
-
-    return false;
-}
-
-}
-
-OUString SAL_CALL HtmlFilterDetect::detect(Sequence<PropertyValue>& lDescriptor)
-    throw (RuntimeException, std::exception)
-{
-    OUString sUrl;
-    OUString sDocService;
-    OString  resultString;
-    Reference<XInputStream> xInStream;
-
-    const PropertyValue *pValue = lDescriptor.getConstArray();
-    sal_Int32 nLength  = lDescriptor.getLength();
-    sal_Int32 location = nLength;
-
-    for ( sal_Int32 i = 0; i < nLength; ++i )
-    {
-        if ( pValue[i].Name == utl::MediaDescriptor::PROP_URL() )
-            pValue[i].Value >>= sUrl;
-        else if ( pValue[i].Name == utl::MediaDescriptor::PROP_INPUTSTREAM() )
-            pValue[i].Value >>= xInStream;
-        else if ( pValue[i].Name == utl::MediaDescriptor::PROP_DOCUMENTSERVICE() )
-        {
-            location = i;
-            pValue[i].Value >>= sDocService;
-        }
-    }
-
-    try
-    {
-        if ( !xInStream.is() )
-        {
-            ucbhelper::Content aContent( sUrl, Reference<XCommandEnvironment>(), mxCtx );
-            xInStream = aContent.openStream();
-            if ( !xInStream.is() )
-                return OUString();
-        }
-
-        boost::scoped_ptr<SvStream> pInStream( utl::UcbStreamHelper::CreateStream( xInStream ) );
-        if ( !pInStream || pInStream->GetError() )
-            return OUString();
-
-        pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
-        sal_Size nUniPos = pInStream->Tell();
-
-        const sal_uInt16 nSize = 4096;
-
-        if ( nUniPos == 3 || nUniPos == 0 ) // UTF-8 or non-Unicode
-            resultString = read_uInt8s_ToOString( *pInStream, nSize );
-        else // UTF-16
-            resultString = OUStringToOString( read_uInt16s_ToOUString( *pInStream, nSize ), RTL_TEXTENCODING_ASCII_US );
-
-        if ( isHTMLStream( resultString.toAsciiLowerCase() ) )
-        {
-            // Some Apps/Web services use ".xls" extension to indicate that
-            // the given file should be opened by a spreadsheet software
-            if ( sDocService.isEmpty() )
-            {
-                INetURLObject aParser( sUrl );
-                OUString aExt = aParser.getExtension( INetURLObject::LAST_SEGMENT, true, INetURLObject::DECODE_WITH_CHARSET );
-                aExt = aExt.toAsciiLowerCase();
-
-                if ( aExt == "xls" )
-                {
-                    if ( location == lDescriptor.getLength() )
-                    {
-                        lDescriptor.realloc( location + 1 );
-                        lDescriptor[location].Name = utl::MediaDescriptor::PROP_DOCUMENTSERVICE();
-                    }
-                    lDescriptor[location].Value <<= OUString( "com.sun.star.sheet.SpreadsheetDocument" );
-                }
-            }
-            return OUString( "generic_HTML" );
-        }
-    }
-    catch (const Exception &)
-    {
-        OSL_FAIL( "An Exception occurred while opening File stream" );
-    }
-
-    return OUString(); // Failed
-}
-
-// XInitialization
-
-void SAL_CALL HtmlFilterDetect::initialize(const Sequence<Any>& /*aArguments*/)
-    throw (Exception, RuntimeException, std::exception)
-{
-}
-
-OUString HtmlFilterDetect_getImplementationName()
-{
-    return OUString( "com.sun.star.comp.filters.HtmlFilterDetect" );
-}
-
-Sequence<OUString> HtmlFilterDetect_getSupportedServiceNames()
-{
-    Sequence<OUString> aRet(2);
-    OUString* pArray = aRet.getArray();
-    pArray[0] = "com.sun.star.document.ExtendedTypeDetection";
-    pArray[1] = "com.sun.star.comp.filters.HtmlFilterDetect";
-    return aRet;
-}
-
-Reference<XInterface> HtmlFilterDetect_createInstance(const Reference<XComponentContext>& rCtx)
-{
-    return (cppu::OWeakObject*) new HtmlFilterDetect( rCtx );
-}
-
-// XServiceInfo
-
-OUString SAL_CALL HtmlFilterDetect::getImplementationName()
-    throw (RuntimeException, std::exception)
-{
-    return HtmlFilterDetect_getImplementationName();
-}
-
-sal_Bool SAL_CALL HtmlFilterDetect::supportsService(const OUString& rServiceName)
-    throw (RuntimeException, std::exception)
-{
-    return cppu::supportsService( this, rServiceName );
-}
-
-Sequence<OUString> SAL_CALL HtmlFilterDetect::getSupportedServiceNames()
-    throw (RuntimeException, std::exception)
-{
-    return HtmlFilterDetect_getSupportedServiceNames();
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/filter/source/htmlfilterdetect/filterdetect.hxx b/filter/source/htmlfilterdetect/filterdetect.hxx
deleted file mode 100644
index f8327af..0000000
--- a/filter/source/htmlfilterdetect/filterdetect.hxx
+++ /dev/null
@@ -1,64 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#ifndef INCLUDED_FILTER_SOURCE_HTMLFILTERDETECT_FILTERDETECT_HXX
-#define INCLUDED_FILTER_SOURCE_HTMLFILTERDETECT_FILTERDETECT_HXX
-
-#include <com/sun/star/document/XExtendedFilterDetection.hpp>
-#include <com/sun/star/lang/XInitialization.hpp>
-#include <com/sun/star/lang/XServiceInfo.hpp>
-#include <com/sun/star/uno/XComponentContext.hpp>
-
-#include <cppuhelper/implbase3.hxx>
-
-class HtmlFilterDetect : public cppu::WeakImplHelper3<
-    com::sun::star::document::XExtendedFilterDetection,
-    com::sun::star::lang::XInitialization,
-    com::sun::star::lang::XServiceInfo>
-{
-    com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext> mxCtx;
-
-public:
-
-    HtmlFilterDetect(const com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext>& xCtx) :
-        mxCtx(xCtx) {}
-    virtual ~HtmlFilterDetect() {}
-
-    // XExtendedFilterDetection
-
-    virtual OUString SAL_CALL detect(com::sun::star::uno::Sequence<com::sun::star::beans::PropertyValue>& lDescriptor)
-        throw (com::sun::star::uno::RuntimeException, std::exception);
-
-    // XInitialization
-
-    virtual void SAL_CALL initialize(const ::com::sun::star::uno::Sequence<com::sun::star::uno::Any>& aArguments)
-        throw (com::sun::star::uno::Exception, com::sun::star::uno::RuntimeException, std::exception);
-
-    // XServiceInfo
-
-    virtual OUString SAL_CALL getImplementationName()
-        throw (com::sun::star::uno::RuntimeException, std::exception);
-
-    virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName)
-        throw (com::sun::star::uno::RuntimeException, std::exception);
-
-    virtual com::sun::star::uno::Sequence<OUString> SAL_CALL getSupportedServiceNames()
-        throw (com::sun::star::uno::RuntimeException, std::exception);
-};
-
-OUString HtmlFilterDetect_getImplementationName();
-
-com::sun::star::uno::Sequence<OUString> HtmlFilterDetect_getSupportedServiceNames();
-
-com::sun::star::uno::Reference<com::sun::star::uno::XInterface>
-HtmlFilterDetect_createInstance(const com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext>& rCtx);
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/filter/source/htmlfilterdetect/htmlfd.component b/filter/source/htmlfilterdetect/htmlfd.component
deleted file mode 100644
index 32c41b8..0000000
--- a/filter/source/htmlfilterdetect/htmlfd.component
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- -->
-
-<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
-    prefix="htmlfd" xmlns="http://openoffice.org/2010/uno-components">
-  <implementation name="com.sun.star.comp.filters.HtmlFilterDetect">
-    <service name="com.sun.star.document.ExtendedTypeDetection"/>
-  </implementation>
-</component>
diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx
index aea331f..ffad7fa 100644
--- a/filter/source/textfilterdetect/filterdetect.cxx
+++ b/filter/source/textfilterdetect/filterdetect.cxx
@@ -9,32 +9,107 @@
 
 #include "filterdetect.hxx"
 
-#include "tools/urlobj.hxx"
-#include "ucbhelper/content.hxx"
+#include <svtools/htmltokn.h>
+#include <tools/urlobj.hxx>
+#include <ucbhelper/content.hxx>
+#include <unotools/mediadescriptor.hxx>
+#include <unotools/ucbstreamhelper.hxx>
 
 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
 #include <com/sun/star/io/XInputStream.hpp>
 #include <cppuhelper/supportsservice.hxx>
+#include <boost/scoped_ptr.hpp>
 
 #define WRITER_TEXT_FILTER "Text"
 #define CALC_TEXT_FILTER   "Text - txt - csv (StarCalc)"
 
+#define WEB_HTML_FILTER    "HTML"
+#define WRITER_HTML_FILTER "HTML (StarWriter)"
+#define CALC_HTML_FILTER   "calc_HTML_WebQuery"
+
+#define WRITER_DOCSERVICE  "com.sun.star.text.TextDocument"
+#define CALC_DOCSERVICE    "com.sun.star.sheet.SpreadsheetDocument"
+
 using namespace ::com::sun::star;
+using utl::MediaDescriptor;
 
 namespace {
 
-template<typename T>
-void setPropValue(uno::Sequence<beans::PropertyValue>& rProps, sal_Int32 nPos, const char* pName, const T& rValue)
+bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
 {
-    if (nPos >= 0)
-        rProps[nPos].Value <<= rValue;
-    else
+    boost::scoped_ptr<SvStream> pInStream( utl::UcbStreamHelper::CreateStream( xInStream ) );
+    if ( !pInStream || pInStream->GetError() )
+        // No stream
+        return false;
+
+    // Read the stream header
+    pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
+    const sal_Size nUniPos = pInStream->Tell();
+    const sal_uInt16 nSize = 4096;
+
+    OString sHeader;
+    if ( nUniPos == 3 || nUniPos == 0 ) // UTF-8 or non-Unicode
+        sHeader = read_uInt8s_ToOString( *pInStream, nSize );
+    else // UTF-16 (nUniPos = 2)
+        sHeader = OUStringToOString( read_uInt16s_ToOUString( *pInStream, nSize ), RTL_TEXTENCODING_ASCII_US );
+
+    // Now check whether the stream begins with a known HTML tag.
+    enum DetectPhase { BeforeTag, TagOpened, InTagName };
+    DetectPhase dp = BeforeTag;
+
+    const char* pHeader = sHeader.getStr();
+    const int   nLength = sHeader.getLength();
+    int i = 0, nStartOfTagIndex = 0;
+
+    for ( i = 0; i < nLength; ++i, ++pHeader )
     {
-        sal_Int32 n = rProps.getLength();
-        rProps.realloc(n+1);
-        rProps[n].Name = OUString::createFromAscii(pName);
-        rProps[n].Value <<= rValue;
+        char c = *pHeader;
+        if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
+        {
+            if ( dp == TagOpened )
+                return false; // Invalid: Should start with a tag name
+            else if ( dp == InTagName )
+                break; // End of tag name reached
+        }
+        else if ( c == '<' )
+        {
+            if ( dp == BeforeTag )
+                dp = TagOpened;
+            else
+                return false; // Invalid: Nested '<'
+        }
+        else if ( c == '>' )
+        {
+            if ( dp == InTagName )
+                break; // End of tag name reached
+            else
+                return false; // Invalid: Empty tag or before '<'
+        }
+        else if ( c == '!' )
+        {
+            if ( dp == TagOpened )
+                return true; // "<!" - DOCTYPE or comments block
+            else
+                return false; // Invalid: '!' before '<' or inside tag name
+        }
+        else
+        {
+            if ( dp == BeforeTag )
+                return false; // Invalid: Should start with a tag
+            else if ( dp == TagOpened )
+            {
+                nStartOfTagIndex = i;
+                dp = InTagName;
+            }
+        }
     }
+
+    // The string following '<' has to be a known HTML token.
+    OString aToken = sHeader.copy( nStartOfTagIndex, i - nStartOfTagIndex );
+    if ( GetHTMLToken( OStringToOUString( aToken.toAsciiLowerCase(), RTL_TEXTENCODING_ASCII_US ) ) != 0 )
+        return true;
+
+    return false;
 }
 
 }
@@ -46,65 +121,54 @@ PlainTextFilterDetect::~PlainTextFilterDetect() {}
 
 OUString SAL_CALL PlainTextFilterDetect::detect(uno::Sequence<beans::PropertyValue>& lDescriptor) throw (uno::RuntimeException, std::exception)
 {
-    OUString aType;
-    OUString aDocService;
-    OUString aExt;
-    OUString aUrl;
+    MediaDescriptor aMediaDesc(lDescriptor);
 
-    sal_Int32 nFilter = -1;
+    OUString aType = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_TYPENAME(), OUString() );
+    OUString aDocService = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_DOCUMENTSERVICE(), OUString() );
+    OUString aUrl = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL(), OUString() );
 
-    for (sal_Int32 i = 0, n = lDescriptor.getLength(); i < n; ++i)
-    {
-        if (lDescriptor[i].Name == "TypeName")
-            lDescriptor[i].Value >>= aType;
-        else if (lDescriptor[i].Name == "FilterName")
-            nFilter = i;
-        else if (lDescriptor[i].Name == "DocumentService")
-            lDescriptor[i].Value >>= aDocService;
-        else if (lDescriptor[i].Name == "URL")
-        {
-            lDescriptor[i].Value >>= aUrl;
+    // Get the file name extension.
+    INetURLObject aParser(aUrl);
+    OUString aExt = aParser.getExtension(INetURLObject::LAST_SEGMENT, true, INetURLObject::DECODE_WITH_CHARSET);
+    aExt = aExt.toAsciiLowerCase();
 
-            // Get the file name extension.
-            INetURLObject aParser(aUrl);
-            aExt = aParser.getExtension(
-                INetURLObject::LAST_SEGMENT, true, INetURLObject::DECODE_WITH_CHARSET);
-            aExt = aExt.toAsciiLowerCase();
-        }
-    }
-
-    if (aType == "generic_Text")
+    if (aType == "generic_HTML")
     {
-        // Generic text type.
+        uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM()], uno::UNO_QUERY);
+        if (!xInStream.is() || !IsHTMLStream(xInStream))
+            return OUString();
 
         // Decide which filter to use based on the document service first,
         // then on extension if that's not available.
 
-        if (aDocService == "com.sun.star.sheet.SpreadsheetDocument")
-            // Open it in Calc.
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(CALC_TEXT_FILTER));
-        else if (aDocService == "com.sun.star.text.TextDocument")
-            // Open it in Writer.
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(WRITER_TEXT_FILTER));
-        else if (aExt == "csv")
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(CALC_TEXT_FILTER));
-        else if (aExt == "tsv")
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(CALC_TEXT_FILTER));
-        else if (aExt == "tab")
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(CALC_TEXT_FILTER));
+        if (aDocService == CALC_DOCSERVICE)
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_HTML_FILTER);
+        else if (aDocService == WRITER_DOCSERVICE)
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_HTML_FILTER);
         else if (aExt == "xls")
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(CALC_TEXT_FILTER));
-        else if (aExt == "txt")
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(WRITER_TEXT_FILTER));
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_HTML_FILTER);
         else
-            // No clue.  Open it in Writer by default.
-            setPropValue(lDescriptor, nFilter, "FilterName", OUString(WRITER_TEXT_FILTER));
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WEB_HTML_FILTER);
+    }
 
-        return aType;
+    else if (aType == "generic_Text")
+    {
+        if (aDocService == CALC_DOCSERVICE)
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
+        else if (aDocService == WRITER_DOCSERVICE)
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
+        else if (aExt == "csv" || aExt == "tsv" || aExt == "tab" || aExt == "xls")
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
+        else
+            aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
     }
 
-    // failed!
-    return OUString();
+    else
+        // Nothing to detect.
+        return OUString();
+
+    aMediaDesc >> lDescriptor;
+    return aType;
 }
 
 // XInitialization
diff --git a/postprocess/Rdb_services.mk b/postprocess/Rdb_services.mk
index 1f9ca5b..48d4c07 100755
--- a/postprocess/Rdb_services.mk
+++ b/postprocess/Rdb_services.mk
@@ -29,7 +29,6 @@ $(eval $(call gb_Rdb_add_components,services,\
 	filter/source/config/cache/filterconfig1 \
 	filter/source/flash/flash \
 	filter/source/graphic/graphicfilter \
-	filter/source/htmlfilterdetect/htmlfd \
 	filter/source/msfilter/msfilter \
 	filter/source/odfflatxml/odfflatxml \
 	filter/source/pdf/pdffilter \
diff --git a/solenv/gbuild/extensions/pre_MergedLibsList.mk b/solenv/gbuild/extensions/pre_MergedLibsList.mk
index ba7ad86..9cc2079 100644
--- a/solenv/gbuild/extensions/pre_MergedLibsList.mk
+++ b/solenv/gbuild/extensions/pre_MergedLibsList.mk
@@ -46,7 +46,6 @@ gb_EXTRAMERGEDLIBS := \
 	graphicfilter \
 	guesslang \
 	$(if $(ENABLE_JAVA),hsqldb) \
-	htmlfd \
 	hyphen \
 	icd \
 	icg \
diff --git a/sw/CppunitTest_sw_htmlexport.mk b/sw/CppunitTest_sw_htmlexport.mk
index 0345d4c..2da0c3f 100644
--- a/sw/CppunitTest_sw_htmlexport.mk
+++ b/sw/CppunitTest_sw_htmlexport.mk
@@ -56,7 +56,7 @@ $(eval $(call gb_CppunitTest_use_components,sw_htmlexport,\
     configmgr/source/configmgr \
     embeddedobj/util/embobj \
     filter/source/config/cache/filterconfig1 \
-    filter/source/htmlfilterdetect/htmlfd \
+    filter/source/textfilterdetect/textfd \
 	forms/util/frm \
     framework/util/fwk \
     i18npool/util/i18npool \


More information about the Libreoffice-commits mailing list