[Libreoffice-commits] core.git: editeng/Library_editeng.mk editeng/qa editeng/source include/editeng sw/inc sw/source

Tomaž Vajngerl quikee at gmail.com
Tue Jun 11 14:17:22 PDT 2013


 editeng/Library_editeng.mk                |    1 
 editeng/qa/lookuptree/lookuptree_test.cxx |   95 ++++++++++++++-
 editeng/source/lookuptree/Trie.cxx        |  183 ++++++++++++++++++++++++++++++
 include/editeng/Trie.hxx                  |   59 +++++++++
 sw/inc/acmplwrd.hxx                       |    4 
 sw/source/core/doc/acmplwrd.cxx           |   18 +-
 6 files changed, 346 insertions(+), 14 deletions(-)

New commits:
commit af3916f8ee5dbd5ccb3b8faca940b57ff2102d76
Author: Tomaž Vajngerl <quikee at gmail.com>
Date:   Tue Jun 11 23:13:14 2013 +0200

    fdo#55315 Added simple Trie lookup tree for autocomplete words storage
    
    Added simple Trie lookup tree which is more tailored to what is needed
    in autocomplete implementation, but still has the speed of the
    LatinLookupTree that has been used till now. As the implementation
    is much simpler it should be more managable and easier fixable.
    
    For now two actions: insert (word) and findSuggestions are supported.
    Acttion findSuggestion returns all words in a list for a searched
    sub-word, it also fixes fdo#62945.
    
    Change-Id: I63b69c30d28b4e1c465c2122ebc537f7f75a033a

diff --git a/editeng/Library_editeng.mk b/editeng/Library_editeng.mk
index f352113..e9c5b4b 100644
--- a/editeng/Library_editeng.mk
+++ b/editeng/Library_editeng.mk
@@ -116,6 +116,7 @@ $(eval $(call gb_Library_add_exception_objects,editeng,\
     editeng/source/lookuptree/LatinLookupTree \
     editeng/source/lookuptree/LatinTreeNode \
     editeng/source/lookuptree/Node \
+    editeng/source/lookuptree/Trie \
 ))
 
 # add libraries to be linked to editeng; again these names need to be given as
diff --git a/editeng/qa/lookuptree/lookuptree_test.cxx b/editeng/qa/lookuptree/lookuptree_test.cxx
index c8ee1ab..4722bb1 100644
--- a/editeng/qa/lookuptree/lookuptree_test.cxx
+++ b/editeng/qa/lookuptree/lookuptree_test.cxx
@@ -25,21 +25,25 @@
 #include <editeng/LookupTree.hxx>
 #include <editeng/LatinLookupTree.hxx>
 
+#include <editeng/Trie.hxx>
+
 namespace {
 
 class LookupTreeTest : public CppUnit::TestFixture
 {
-    public:
-        void test();
+public:
+    void testLookupTree();
+    void testTrie();
 
     CPPUNIT_TEST_SUITE(LookupTreeTest);
-    CPPUNIT_TEST(test);
+    CPPUNIT_TEST(testLookupTree);
+    CPPUNIT_TEST(testTrie);
     CPPUNIT_TEST_SUITE_END();
 };
 
 CPPUNIT_TEST_SUITE_REGISTRATION(LookupTreeTest);
 
-void LookupTreeTest::test()
+void LookupTreeTest::testLookupTree()
 {
     LookupTree* a = new LatinLookupTree( "a" );
 
@@ -218,6 +222,89 @@ void LookupTreeTest::test()
     delete a;
 }
 
+void LookupTreeTest::testTrie()
+{
+    editeng::Trie trie;
+    std::vector<OUString> suggestions;
+
+    trie.findSuggestions( OUString(""), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+
+    trie.insert( OUString("") );
+    trie.findSuggestions( OUString(""), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+
+    trie.findSuggestions( OUString("a"), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+
+    trie.insert( OUString("abc") );
+    trie.insert( OUString("abcdefghijklmnopqrstuvwxyz") );
+    trie.findSuggestions( OUString("a"), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 2, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("abc"), suggestions[0] );
+    CPPUNIT_ASSERT_EQUAL( OUString("abcdefghijklmnopqrstuvwxyz"), suggestions[1] );
+    suggestions.clear();
+
+    trie.findSuggestions( OUString("abc"), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("abcdefghijklmnopqrstuvwxyz"), suggestions[0] );
+    suggestions.clear();
+
+    trie.findSuggestions( OUString("abe"), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+    suggestions.clear();
+
+    trie.insert( OUString("abe") );
+    trie.findSuggestions( OUString(""), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 3, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("abc"), suggestions[0] );
+    CPPUNIT_ASSERT_EQUAL( OUString("abcdefghijklmnopqrstuvwxyz"), suggestions[1] );
+    CPPUNIT_ASSERT_EQUAL( OUString("abe"), suggestions[2] );
+    suggestions.clear();
+
+    trie.insert( OUString("H31l0") );
+    trie.findSuggestions( OUString("H"), suggestions);
+
+    CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+    suggestions.clear();
+
+    trie.insert( OUString("H1") );
+    trie.findSuggestions( OUString("H"), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 2, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+    CPPUNIT_ASSERT_EQUAL( OUString("H1"), suggestions[1] );
+    suggestions.clear();
+
+    trie.findSuggestions( OUString("H3"), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+    suggestions.clear();
+
+    trie.insert( OStringToOUString( "H\xC3\xA4llo", RTL_TEXTENCODING_UTF8 ) );
+    trie.findSuggestions( OUString("H"), suggestions );
+    CPPUNIT_ASSERT_EQUAL( (size_t) 3, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+    CPPUNIT_ASSERT_EQUAL( OUString("H1"), suggestions[1] );
+    CPPUNIT_ASSERT_EQUAL( OStringToOUString( "H\xC3\xA4llo", RTL_TEXTENCODING_UTF8 ), suggestions[2] );
+    suggestions.clear();
+
+    trie.findSuggestions( OUString("H3"), suggestions );
+    CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+    suggestions.clear();
+
+    trie.findSuggestions( OStringToOUString("H\xC3\xA4", RTL_TEXTENCODING_UTF8), suggestions );
+    CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+    CPPUNIT_ASSERT_EQUAL( OStringToOUString("H\xC3\xA4llo", RTL_TEXTENCODING_UTF8), suggestions[0] );
+    suggestions.clear();
+
+    trie.findSuggestions( OUString(""), suggestions);
+    CPPUNIT_ASSERT_EQUAL( (size_t) 6, suggestions.size() );
+    suggestions.clear();
+
+}
+
 } // namespace end
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/editeng/source/lookuptree/Trie.cxx b/editeng/source/lookuptree/Trie.cxx
new file mode 100644
index 0000000..30ee0be
--- /dev/null
+++ b/editeng/source/lookuptree/Trie.cxx
@@ -0,0 +1,183 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <editeng/Trie.hxx>
+
+namespace editeng
+{
+
+using namespace std;
+
+/* TrieNode */
+
+TrieNode::TrieNode(sal_Unicode aCharacter) :
+    mCharacter(aCharacter),
+    mMarker(false)
+{
+    for (int i=0; i<LATIN_ARRAY_SIZE; i++)
+    {
+        mLatinArray[i] = NULL;
+    }
+}
+
+TrieNode::~TrieNode()
+{
+    vector<TrieNode*>::iterator iNode;
+    for(iNode = mChildren.begin(); iNode != mChildren.end(); iNode++)
+    {
+        delete *iNode;
+    }
+
+    for (int i=0; i<LATIN_ARRAY_SIZE; i++)
+    {
+        delete mLatinArray[i];
+    }
+}
+
+void TrieNode::markWord()
+{
+    mMarker = true;
+}
+
+void TrieNode::addNewChild(TrieNode* pChild)
+{
+    if (pChild->mCharacter >= sal_Unicode('a') &&
+        pChild->mCharacter <= sal_Unicode('z'))
+    {
+        mLatinArray[pChild->mCharacter - sal_Unicode('a')] = pChild;
+    }
+    else
+    {
+        mChildren.push_back(pChild);
+    }
+}
+
+TrieNode* TrieNode::findChild(sal_Unicode aInputCharacter)
+{
+    if (aInputCharacter >= sal_Unicode('a') &&
+        aInputCharacter <= sal_Unicode('z'))
+    {
+        return mLatinArray[aInputCharacter - sal_Unicode('a')];
+    }
+
+    vector<TrieNode*>::iterator iNode;
+
+    for(iNode = mChildren.begin(); iNode != mChildren.end(); iNode++)
+    {
+        TrieNode* pCurrent = *iNode;
+        if ( pCurrent->mCharacter == aInputCharacter )
+            return pCurrent;
+    }
+
+    return NULL;
+}
+
+void TrieNode::collectSuggestions(OUString sPath, vector<OUString>& rSuggestionList)
+{
+    // first traverse nodes for alphabet characters
+    for (int i=0; i<LATIN_ARRAY_SIZE; i++)
+    {
+        TrieNode* pCurrent = mLatinArray[i];
+        if (pCurrent != NULL)
+        {
+            OUString aStringPath = sPath + OUString(pCurrent->mCharacter);
+            if(pCurrent->mMarker)
+                rSuggestionList.push_back(aStringPath);
+            // recursivly traverse tree
+            pCurrent->collectSuggestions(aStringPath, rSuggestionList);
+        }
+    }
+
+    // traverse nodes for other characters
+    vector<TrieNode*>::iterator iNode;
+    for(iNode = mChildren.begin(); iNode != mChildren.end(); iNode++)
+    {
+        TrieNode* pCurrent = *iNode;
+        if (pCurrent != NULL)
+        {
+            OUString aStringPath = sPath + OUString(pCurrent->mCharacter);
+            if(pCurrent->mMarker)
+                rSuggestionList.push_back(aStringPath);
+            // recursivly traverse tree
+            pCurrent->collectSuggestions(aStringPath, rSuggestionList);
+        }
+    }
+}
+
+TrieNode* TrieNode::traversePath(OUString sPath)
+{
+    TrieNode* pCurrent = this;
+
+    for ( sal_Int32 i = 0; i < sPath.getLength(); i++ )
+    {
+        sal_Unicode aCurrentChar = sPath[i];
+        pCurrent = pCurrent->findChild(aCurrentChar);
+        if ( pCurrent == NULL )
+            return NULL;
+    }
+
+    return pCurrent;
+}
+
+/* TRIE */
+
+Trie::Trie()
+{
+    mRoot = new TrieNode();
+}
+
+Trie::~Trie()
+{
+    delete mRoot;
+}
+
+void Trie::insert(OUString sInputString) const
+{
+    // adding an empty word is not allowed
+    if ( sInputString.isEmpty() )
+    {
+        return;
+    }
+
+    // traverse the input string and modify the tree with new nodes / characters
+
+    TrieNode* pCurrent = mRoot;
+    sal_Unicode aCurrentChar;
+
+    for ( sal_Int32 i = 0; i < sInputString.getLength(); i++ )
+    {
+        aCurrentChar = sInputString[i];
+        TrieNode* pChild = pCurrent->findChild(aCurrentChar);
+        if ( pChild == NULL )
+        {
+            TrieNode* pNewNode = new TrieNode(aCurrentChar);
+            pCurrent->addNewChild(pNewNode);
+            pCurrent = pNewNode;
+        }
+        else
+        {
+            pCurrent = pChild;
+        }
+    }
+
+    pCurrent->markWord();
+}
+
+void Trie::findSuggestions(OUString sWordPart, vector<OUString>& rSuggesstionList) const
+{
+    TrieNode* pNode = mRoot->traversePath(sWordPart);
+
+    if (pNode != NULL)
+    {
+        pNode->collectSuggestions(sWordPart, rSuggesstionList);
+    }
+}
+
+}
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/editeng/Trie.hxx b/include/editeng/Trie.hxx
new file mode 100644
index 0000000..2ac76ae
--- /dev/null
+++ b/include/editeng/Trie.hxx
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef TRIE_HXX
+#define TRIE_HXX
+
+#include <sal/types.h>
+#include <rtl/ustring.hxx>
+#include <vector>
+#include <editeng/editengdllapi.h>
+
+namespace editeng
+{
+
+struct TrieNode
+{
+    static const int LATIN_ARRAY_SIZE = 26;
+
+    sal_Unicode             mCharacter;
+    bool                    mMarker;
+    std::vector<TrieNode*>  mChildren;
+    TrieNode*               mLatinArray[LATIN_ARRAY_SIZE];
+
+
+    TrieNode(sal_Unicode aCharacter = '\0');
+    virtual ~TrieNode();
+
+    void      markWord();
+    TrieNode* findChild(sal_Unicode aCharacter);
+    TrieNode* traversePath(OUString sPath);
+    void      addNewChild(TrieNode* pChild);
+    void      collectSuggestions(OUString sPath, std::vector<OUString>& rSuggestionList);
+};
+
+class EDITENG_DLLPUBLIC Trie
+{
+private:
+    TrieNode* mRoot;
+
+public:
+    Trie();
+    virtual ~Trie();
+
+    void insert(OUString sInputString) const;
+    void findSuggestions(OUString sWordPart, std::vector<OUString>& rSuggesstionList) const;
+
+};
+
+}
+
+#endif // TRIE_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/inc/acmplwrd.hxx b/sw/inc/acmplwrd.hxx
index 7b2f40b..7be3b3f 100644
--- a/sw/inc/acmplwrd.hxx
+++ b/sw/inc/acmplwrd.hxx
@@ -23,7 +23,7 @@
 #include <deque>
 
 #include <editeng/swafopt.hxx>
-#include <editeng/LatinLookupTree.hxx>
+#include <editeng/Trie.hxx>
 
 class SwDoc;
 class SwAutoCompleteWord_Impl;
@@ -38,7 +38,7 @@ class SwAutoCompleteWord
 
     /// contains extended strings carrying source information
     editeng::SortedAutoCompleteStrings m_WordList;
-    LookupTree* m_LookupTree;
+    editeng::Trie m_LookupTree;
     SwAutoCompleteStringPtrDeque aLRULst;
 
     SwAutoCompleteWord_Impl* pImpl;
diff --git a/sw/source/core/doc/acmplwrd.cxx b/sw/source/core/doc/acmplwrd.cxx
index 2c865b4..9ec7606 100644
--- a/sw/source/core/doc/acmplwrd.cxx
+++ b/sw/source/core/doc/acmplwrd.cxx
@@ -221,13 +221,11 @@ SwAutoCompleteWord::SwAutoCompleteWord( sal_uInt16 nWords, sal_uInt16 nMWrdLen )
     nMinWrdLen( nMWrdLen ),
     bLockWordLst( sal_False )
 {
-    m_LookupTree = new LatinLookupTree(OUString("default"));
 }
 
 SwAutoCompleteWord::~SwAutoCompleteWord()
 {
     m_WordList.DeleteAndDestroyAll(); // so the assertion below works
-    delete m_LookupTree;
     delete pImpl;
 #if OSL_DEBUG_LEVEL > 0
     sal_uLong nStrings = SwAutoCompleteString::GetElementCount();
@@ -265,7 +263,7 @@ bool SwAutoCompleteWord::InsertWord( const String& rWord, SwDoc& rDoc )
         std::pair<editeng::SortedAutoCompleteStrings::const_iterator, bool>
             aInsPair = m_WordList.insert(pNew);
 
-        m_LookupTree->insert( OUString(aNewWord).copy(0, nWrdLen) );
+        m_LookupTree.insert( OUString(aNewWord).copy(0, nWrdLen) );
 
         if (aInsPair.second)
         {
@@ -355,15 +353,19 @@ bool SwAutoCompleteWord::GetWordsMatching(String aMatch, std::vector<String>& aW
 {
     OUString aStringRoot = OUString( aMatch );
 
-    m_LookupTree->gotoNode( aStringRoot );
-    OUString aAutocompleteWord = m_LookupTree->suggestAutoCompletion();
-    if (aAutocompleteWord.isEmpty())
+    std::vector<OUString> suggestions;
+    m_LookupTree.findSuggestions(aStringRoot, suggestions);
+
+    if (suggestions.empty())
     {
         return false;
     }
 
-    OUString aCompleteWord = aStringRoot + aAutocompleteWord;
-    aWords.push_back( String(aCompleteWord) );
+    for (size_t i = 0; i < suggestions.size(); i++)
+    {
+        aWords.push_back( String(suggestions[i]) );
+    }
+
     return true;
 }
 


More information about the Libreoffice-commits mailing list