[Libreoffice-commits] core.git: editeng/CppunitTest_editeng_core.mk editeng/CustomTarget_generated.mk editeng/Library_editeng.mk editeng/Module_editeng.mk editeng/source solenv/bin

Daniel Sikeler d.sikeler94 at gmail.com
Tue Nov 11 01:37:19 PST 2014


 editeng/CppunitTest_editeng_core.mk                  |    1 
 editeng/CustomTarget_generated.mk                    |   27 +++++++
 editeng/Library_editeng.mk                           |    3 
 editeng/Module_editeng.mk                            |    1 
 editeng/source/misc/SvXMLAutoCorrectTokenHandler.cxx |   18 ++---
 editeng/source/misc/SvXMLAutoCorrectTokenHandler.hxx |    9 ++
 editeng/source/misc/tokens.txt                       |    7 ++
 solenv/bin/gentoken.pl                               |   65 +++++++++++++++++++
 8 files changed, 121 insertions(+), 10 deletions(-)

New commits:
commit 2b442c1df48fce7a265e983a902f4a30e3edabf1
Author: Daniel Sikeler <d.sikeler94 at gmail.com>
Date:   Mon Nov 3 11:29:48 2014 +0000

    fdo#80403: TokenHandler impl. FastTokenHandlerBase
    
    getTokenFromUTF8: calls getTokenDirect
    getTokenDirect: uses perfect hash
    
    New makefile creates perfect hash table with perl-script gentoken.pl.
    I found the script on https://wiki.openoffice.org/wiki/XFastTokenHandler and made some changes.
    XMLTokens are defined in tokens.txt
    
    Change-Id: Id04134a896ee082e2d0ba55a715fede19ff04928
    Reviewed-on: https://gerrit.libreoffice.org/12335
    Reviewed-by: Michael Meeks <michael.meeks at collabora.com>
    Tested-by: Michael Meeks <michael.meeks at collabora.com>

diff --git a/editeng/CppunitTest_editeng_core.mk b/editeng/CppunitTest_editeng_core.mk
index 488a5d0..f66caec 100644
--- a/editeng/CppunitTest_editeng_core.mk
+++ b/editeng/CppunitTest_editeng_core.mk
@@ -27,6 +27,7 @@ $(eval $(call gb_CppunitTest_use_libraries,editeng_core, \
     lng \
     sal \
     salhelper \
+    sax \
     sot \
     svl \
     svt \
diff --git a/editeng/CustomTarget_generated.mk b/editeng/CustomTarget_generated.mk
new file mode 100644
index 0000000..f3329df
--- /dev/null
+++ b/editeng/CustomTarget_generated.mk
@@ -0,0 +1,27 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CustomTarget_CustomTarget,editeng/generated))
+
+SRC := $(SRCDIR)/editeng/source/misc
+PL := $(SRCDIR)/solenv/bin/gentoken.pl
+INC := $(call gb_CustomTarget_get_workdir,editeng/generated)
+
+$(INC)/tokens.hxx $(INC)/tokens.gperf : $(SRC)/tokens.txt $(PL)
+	mkdir -p $(INC)
+	$(PERL) $(PL) $(SRC)/tokens.txt $(INC)/tokens.gperf
+
+$(INC)/tokens.cxx : $(INC)/tokens.gperf
+	$(GPERF) --compare-strncmp --readonly-tables --output-file=$(INC)/tokens.cxx $(INC)/tokens.gperf
+	sed -i -e "s/(char\*)0/(char\*)0, XML_TOKEN_INVALID/g" $(INC)/tokens.cxx
+	sed -i -e "/^#line/d" $(INC)/tokens.cxx
+
+$(call gb_CustomTarget_get_target,editeng/generated) : $(INC)/tokens.cxx
+
+# vim: set noet sw=4 ts=4:
diff --git a/editeng/Library_editeng.mk b/editeng/Library_editeng.mk
index c1cc094..22f4c08 100644
--- a/editeng/Library_editeng.mk
+++ b/editeng/Library_editeng.mk
@@ -24,6 +24,8 @@ $(eval $(call gb_Library_set_include,editeng,\
     -I$(SRCDIR)/editeng/inc \
 ))
 
+$(eval $(call gb_Library_use_custom_headers,editeng,editeng/generated))
+
 $(eval $(call gb_Library_set_precompiled_header,editeng,$(SRCDIR)/editeng/inc/pch/precompiled_editeng))
 
 $(eval $(call gb_Library_add_defs,editeng,\
@@ -140,6 +142,7 @@ $(eval $(call gb_Library_use_libraries,editeng,\
     cppu \
     sal \
     salhelper \
+    sax \
     i18nlangtag \
     i18nutil \
 	$(gb_UWINAPI) \
diff --git a/editeng/Module_editeng.mk b/editeng/Module_editeng.mk
index de8840b..22f3602 100644
--- a/editeng/Module_editeng.mk
+++ b/editeng/Module_editeng.mk
@@ -20,6 +20,7 @@
 $(eval $(call gb_Module_Module,editeng))
 
 $(eval $(call gb_Module_add_targets,editeng,\
+        CustomTarget_generated \
 	Library_editeng \
 ))
 
diff --git a/editeng/source/misc/SvXMLAutoCorrectTokenHandler.cxx b/editeng/source/misc/SvXMLAutoCorrectTokenHandler.cxx
index 81982a6..f669291 100644
--- a/editeng/source/misc/SvXMLAutoCorrectTokenHandler.cxx
+++ b/editeng/source/misc/SvXMLAutoCorrectTokenHandler.cxx
@@ -10,6 +10,7 @@
 #include <SvXMLAutoCorrectTokenHandler.hxx>
 #include <xmloff/xmltoken.hxx>
 #include <com/sun/star/xml/sax/FastToken.hpp>
+#include <tokens.cxx>
 
 using namespace ::css::uno;
 using namespace ::xmloff::token;
@@ -25,14 +26,7 @@ SvXMLAutoCorrectTokenHandler::~SvXMLAutoCorrectTokenHandler()
 sal_Int32 SAL_CALL SvXMLAutoCorrectTokenHandler::getTokenFromUTF8( const Sequence< sal_Int8 >& Identifier )
      throw (::css::uno::RuntimeException, std::exception)
 {
-    switch( Identifier.getLength() )
-    {
-        case 4:     return XML_NAME;
-        case 5:     return XML_BLOCK;
-        case 10:    return XML_BLOCK_LIST;
-        case 16:    return XML_ABBREVIATED_NAME;
-        default:    return css::xml::sax::FastToken::DONTKNOW;
-    }
+    return getTokenDirect( reinterpret_cast< const char* >( Identifier.getConstArray() ), Identifier.getLength() );
 }
 
 Sequence< sal_Int8 > SAL_CALL SvXMLAutoCorrectTokenHandler::getUTF8Identifier( sal_Int32 )
@@ -41,4 +35,12 @@ Sequence< sal_Int8 > SAL_CALL SvXMLAutoCorrectTokenHandler::getUTF8Identifier( s
     return Sequence< sal_Int8 >();
 }
 
+sal_Int32 SvXMLAutoCorrectTokenHandler::getTokenDirect( const char *pTag, sal_Int32 nLength ) const
+{
+    if( !nLength )
+        nLength = strlen( pTag );
+    const struct xmltoken* pToken = Perfect_Hash::in_word_set( pTag, nLength );
+    return pToken ? pToken->nToken : XML_TOKEN_INVALID;
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/editeng/source/misc/SvXMLAutoCorrectTokenHandler.hxx b/editeng/source/misc/SvXMLAutoCorrectTokenHandler.hxx
index dfe8762..eaf15d6 100644
--- a/editeng/source/misc/SvXMLAutoCorrectTokenHandler.hxx
+++ b/editeng/source/misc/SvXMLAutoCorrectTokenHandler.hxx
@@ -16,6 +16,7 @@
 #include <cppuhelper/implbase1.hxx>
 #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
 #include <com/sun/star/xml/sax/FastToken.hpp>
+#include <sax/fastattribs.hxx>
 
 using namespace ::css::xml::sax;
 using namespace ::xmloff::token;
@@ -30,10 +31,11 @@ enum SvXMLAutoCorrectToken : sal_Int32
 };
 
 class SvXMLAutoCorrectTokenHandler : public
-        cppu::WeakImplHelper1< css::xml::sax::XFastTokenHandler >
+        cppu::WeakImplHelper1< css::xml::sax::XFastTokenHandler >,
+        public sax_fastparser::FastTokenHandlerBase
 {
 public:
-    SvXMLAutoCorrectTokenHandler();
+    explicit SvXMLAutoCorrectTokenHandler();
     virtual ~SvXMLAutoCorrectTokenHandler();
 
     //XFastTokenHandler
@@ -41,6 +43,9 @@ public:
         throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
     virtual css::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 Token )
         throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
+
+    // Much faster direct C++ shortcut to the method that matters
+    virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const SAL_OVERRIDE;
 };
 
 #endif // EDITENG_SOURCE_MISC_SVXMLAUTOCORRECTTOKENHANDLER_HXX
diff --git a/editeng/source/misc/tokens.txt b/editeng/source/misc/tokens.txt
new file mode 100644
index 0000000..0b5a646
--- /dev/null
+++ b/editeng/source/misc/tokens.txt
@@ -0,0 +1,7 @@
+abbreviated-name
+block
+block-list
+list-name
+name
+package-name
+unformatted-text
diff --git a/solenv/bin/gentoken.pl b/solenv/bin/gentoken.pl
new file mode 100644
index 0000000..f742e65
--- /dev/null
+++ b/solenv/bin/gentoken.pl
@@ -0,0 +1,65 @@
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements. See the NOTICE file distributed
+#   with this work for additional information regarding copyright
+#   ownership. The ASF licenses this file to you under the Apache
+#   License, Version 2.0 (the "License"); you may not use this file
+#   except in compliance with the License. You may obtain a copy of
+#   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+
+$ARGV0 = shift @ARGV;
+$ARGV1 = shift @ARGV;
+
+open ( TOKENS, $ARGV0 ) || die "can't open token file: $!";
+my %tokens;
+
+while ( defined ($line = <TOKENS>) )
+{
+    if( !($line =~ /^#/) )
+    {
+        chomp($line);
+        @token = split(/\s+/,$line);
+        if ( not defined ($token[1]) )
+        {
+            $token[1] = "XML_".$token[0];
+            $token[1] =~ tr/\-\.\:/___/;
+            $token[1] =~ s/\+/PLUS/g;
+            $token[1] =~ s/\-/MINUS/g;
+        }
+
+        $tokens{$token[0]} = uc($token[1]);
+    }
+}
+close ( TOKENS );
+
+open ( GPERF, ">$ARGV1" ) || die "can't open tokens.gperf file: $!";
+
+print ( GPERF "%language=C++\n" );
+print ( GPERF "%global-table\n" );
+print ( GPERF "%null-strings\n" );
+print ( GPERF "%struct-type\n" );
+print ( GPERF "struct xmltoken\n" );
+print ( GPERF "{\n" );
+print ( GPERF "  const sal_Char *name; XMLTokenEnum nToken; \n" );
+print ( GPERF "};\n" );
+print ( GPERF "%%\n" );
+
+$i = 0;
+foreach( sort(keys(%tokens)) )
+{
+    $i = $i + 1;
+    print( GPERF "$_,$tokens{$_}\n" );
+}
+print ( GPERF "%%\n" );
+close ( GPERF );
+
+# vim: set noet sw=4 ts=4:


More information about the Libreoffice-commits mailing list