[Libreoffice-commits] core.git: cui/source editeng/source include/editeng
Noel Grandin (via logerrit)
logerrit at kemper.freedesktop.org
Fri Sep 20 08:34:30 UTC 2019
cui/source/tabpages/autocdlg.cxx | 16 +-
editeng/source/misc/SvXMLAutoCorrectExport.cxx | 8 -
editeng/source/misc/svxacorr.cxx | 143 +++++++++++++------------
include/editeng/svxacorr.hxx | 10 +
4 files changed, 96 insertions(+), 81 deletions(-)
New commits:
commit 77dec7588c9141b03f8ec0139eb96c298b26f261
Author: Noel Grandin <noel.grandin at collabora.co.uk>
AuthorDate: Thu Sep 19 12:29:42 2019 +0200
Commit: Noel Grandin <noel.grandin at collabora.co.uk>
CommitDate: Fri Sep 20 10:33:24 2019 +0200
tdf#109158 improve sorting when loading large autocorrect file
reduces time from 2.0s to 1.7s
reduce work by
(*) reserving some arrays
(*) pre-sorting with a cheaper comparator
(*) don't copy when returning result, just return a const&
(*) flattening the data-structures to reduce pointer-chasing
Change-Id: I972bd7ffdbf2121c2d38c24aca9618ca708e920c
Reviewed-on: https://gerrit.libreoffice.org/79119
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin at collabora.co.uk>
diff --git a/cui/source/tabpages/autocdlg.cxx b/cui/source/tabpages/autocdlg.cxx
index d2ab9b0013da..ca22e820f569 100644
--- a/cui/source/tabpages/autocdlg.cxx
+++ b/cui/source/tabpages/autocdlg.cxx
@@ -742,12 +742,14 @@ bool OfaAutocorrReplacePage::FillItemSet( SfxItemSet* )
std::vector<SvxAutocorrWord> aDeleteWords;
std::vector<SvxAutocorrWord> aNewWords;
+ aDeleteWords.reserve( rStringChangeList.aDeletedEntries.size() );
for (const DoubleString & deleteEntry : rStringChangeList.aDeletedEntries)
{
SvxAutocorrWord aDeleteWord( deleteEntry.sShort, deleteEntry.sLong );
aDeleteWords.push_back( aDeleteWord );
}
+ aNewWords.reserve( rStringChangeList.aNewEntries.size() );
for (const DoubleString & newEntry : rStringChangeList.aNewEntries)
{
//fdo#67697 if the user data is set then we want to retain the
@@ -834,10 +836,10 @@ void OfaAutocorrReplacePage::RefillReplaceBox(bool bFromReset,
{
SvxAutoCorrect* pAutoCorrect = SvxAutoCorrCfg::Get().GetAutoCorrect();
SvxAutocorrWordList* pWordList = pAutoCorrect->LoadAutocorrWordList(eLang);
- SvxAutocorrWordList::Content aContent = pWordList->getSortedContent();
- m_xReplaceTLB->bulk_insert_for_each(aContent.size(), [this, &aContent](weld::TreeIter& rIter, int nIndex) {
- auto const& elem = aContent[nIndex];
- bool bTextOnly = elem->IsTextOnly();
+ const SvxAutocorrWordList::AutocorrWordSetType & rContent = pWordList->getSortedContent();
+ m_xReplaceTLB->bulk_insert_for_each(rContent.size(), [this, rContent](weld::TreeIter& rIter, int nIndex) {
+ auto const& elem = rContent[nIndex];
+ bool bTextOnly = elem.IsTextOnly();
// formatted text is only in Writer
if (bSWriter || bTextOnly)
{
@@ -847,12 +849,12 @@ void OfaAutocorrReplacePage::RefillReplaceBox(bool bFromReset,
OUString sId = OUString::number(reinterpret_cast<sal_Int64>(m_xTextOnlyCB.get()));
m_xReplaceTLB->set_id(rIter, sId);
}
- m_xReplaceTLB->set_text(rIter, elem->GetShort(), 0);
- m_xReplaceTLB->set_text(rIter, elem->GetLong(), 1);
+ m_xReplaceTLB->set_text(rIter, elem.GetShort(), 0);
+ m_xReplaceTLB->set_text(rIter, elem.GetLong(), 1);
}
else
{
- aFormatText.insert(elem->GetShort());
+ aFormatText.insert(elem.GetShort());
}
}, &m_aReplaceFixedWidths);
m_xNewReplacePB->set_sensitive(false);
diff --git a/editeng/source/misc/SvXMLAutoCorrectExport.cxx b/editeng/source/misc/SvXMLAutoCorrectExport.cxx
index 8ebefccbe17f..4dadeca36fe3 100644
--- a/editeng/source/misc/SvXMLAutoCorrectExport.cxx
+++ b/editeng/source/misc/SvXMLAutoCorrectExport.cxx
@@ -51,15 +51,15 @@ ErrCode SvXMLAutoCorrectExport::exportDoc(enum XMLTokenEnum /*eClass*/)
GetNamespaceMap_().GetNameByKey ( XML_NAMESPACE_BLOCKLIST ) );
{
SvXMLElementExport aRoot (*this, XML_NAMESPACE_BLOCKLIST, XML_BLOCK_LIST, true, true);
- SvxAutocorrWordList::Content aContent = pAutocorr_List->getSortedContent();
- for (auto const& content : aContent)
+ const SvxAutocorrWordList::AutocorrWordSetType& rContent = pAutocorr_List->getSortedContent();
+ for (auto const& content : rContent)
{
AddAttribute( XML_NAMESPACE_BLOCKLIST,
XML_ABBREVIATED_NAME,
- content->GetShort());
+ content.GetShort());
AddAttribute( XML_NAMESPACE_BLOCKLIST,
XML_NAME,
- content->IsTextOnly() ? content->GetLong() : content->GetShort());
+ content.IsTextOnly() ? content.GetLong() : content.GetShort());
SvXMLElementExport aBlock( *this, XML_NAMESPACE_BLOCKLIST, XML_BLOCK, true, true);
}
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
index 6b9e6df1111c..96d7c1561f27 100644
--- a/editeng/source/misc/svxacorr.cxx
+++ b/editeng/source/misc/svxacorr.cxx
@@ -2488,10 +2488,10 @@ bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWo
{
for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
{
- std::unique_ptr<SvxAutocorrWord> pFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
- if( pFoundEntry )
+ boost::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
+ if( xFoundEntry )
{
- if( !pFoundEntry->IsTextOnly() )
+ if( !xFoundEntry->IsTextOnly() )
{
OUString aName( aWordToDelete.GetShort() );
if (xStorage->IsOLEStorage())
@@ -2510,24 +2510,24 @@ bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWo
for (SvxAutocorrWord & aNewEntrie : aNewEntries)
{
- std::unique_ptr<SvxAutocorrWord> pWordToAdd(new SvxAutocorrWord( aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ));
- std::unique_ptr<SvxAutocorrWord> pRemoved = pAutocorr_List->FindAndRemove( pWordToAdd.get() );
- if( pRemoved )
+ SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
+ boost::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
+ if( xRemoved )
{
- if( !pRemoved->IsTextOnly() )
+ if( !xRemoved->IsTextOnly() )
{
// Still have to remove the Storage
- OUString sStorageName( pWordToAdd->GetShort() );
+ OUString sStorageName( aWordToAdd.GetShort() );
if (xStorage->IsOLEStorage())
sStorageName = EncryptBlockName_Imp(sStorageName);
else
- GeneratePackageName ( pWordToAdd->GetShort(), sStorageName);
+ GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
if( xStorage->IsContained( sStorageName ) )
xStorage->Remove( sStorageName );
}
}
- bRet = pAutocorr_List->Insert( std::move(pWordToAdd) );
+ bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
if ( !bRet )
{
@@ -2556,11 +2556,11 @@ bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUStrin
// Update the word list
if( bRet )
{
- std::unique_ptr<SvxAutocorrWord> pNew(new SvxAutocorrWord( rShort, rLong, true ));
- std::unique_ptr<SvxAutocorrWord> pRemove = pAutocorr_List->FindAndRemove( pNew.get() );
- if( pRemove )
+ SvxAutocorrWord aNew(rShort, rLong, true );
+ boost::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
+ if( xRemove )
{
- if( !pRemove->IsTextOnly() )
+ if( !xRemove->IsTextOnly() )
{
// Still have to remove the Storage
OUString sStgNm( rShort );
@@ -2574,7 +2574,7 @@ bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUStrin
}
}
- if( pAutocorr_List->Insert( std::move(pNew) ) )
+ if( pAutocorr_List->Insert( std::move(aNew) ) )
{
bRet = MakeBlocklist_Imp( *xStg );
xStg = nullptr;
@@ -2605,8 +2605,7 @@ void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
// Update the word list
if( bRet )
{
- std::unique_ptr<SvxAutocorrWord> pNew(new SvxAutocorrWord( rShort, sLong, false ));
- if( pAutocorr_List->Insert( std::move(pNew) ) )
+ if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
{
tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
MakeBlocklist_Imp( *xStor );
@@ -2619,20 +2618,18 @@ void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
}
// Keep the list sorted ...
-struct CompareSvxAutocorrWordList
+struct SvxAutocorrWordList::CompareSvxAutocorrWordList
{
- bool operator()( SvxAutocorrWord* const& lhs, SvxAutocorrWord* const& rhs ) const
+ bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
{
CollatorWrapper& rCmp = ::GetCollatorWrapper();
- return rCmp.compareString( lhs->GetShort(), rhs->GetShort() ) < 0;
+ return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
}
};
namespace {
-// can't use std::unique_ptr until we have C++14
-typedef std::set<SvxAutocorrWord*, CompareSvxAutocorrWordList> AutocorrWordSetType;
-typedef std::unordered_map<OUString, std::unique_ptr<SvxAutocorrWord>> AutocorrWordHashType;
+typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
}
@@ -2640,16 +2637,14 @@ struct SvxAutocorrWordList::Impl
{
// only one of these contains the data
- mutable AutocorrWordSetType maSet;
+ // maSortedVector is manually sorted so we can optimise data movement
+ mutable AutocorrWordSetType maSortedVector;
mutable AutocorrWordHashType maHash; // key is 'Short'
void DeleteAndDestroyAll()
{
maHash.clear();
-
- for (auto const& elem : maSet)
- delete elem;
- maSet.clear();
+ maSortedVector.clear();
}
};
@@ -2657,7 +2652,6 @@ SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
SvxAutocorrWordList::~SvxAutocorrWordList()
{
- mpImpl->DeleteAndDestroyAll();
}
void SvxAutocorrWordList::DeleteAndDestroyAll()
@@ -2666,70 +2660,89 @@ void SvxAutocorrWordList::DeleteAndDestroyAll()
}
// returns true if inserted
-bool SvxAutocorrWordList::Insert(std::unique_ptr<SvxAutocorrWord> pWord) const
+const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
{
- if ( mpImpl->maSet.empty() ) // use the hash
+ if ( mpImpl->maSortedVector.empty() ) // use the hash
{
- OUString aShort( pWord->GetShort() );
- return mpImpl->maHash.insert( std::pair<OUString, std::unique_ptr<SvxAutocorrWord> >( aShort, std::move(pWord) ) ).second;
+ OUString aShort = aWord.GetShort();
+ auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
+ if (inserted)
+ return &(it->second);
+ return nullptr;
}
else
- return mpImpl->maSet.insert( pWord.release() ).second;
+ {
+ auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
+ if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(aWord, *it))
+ {
+ it = mpImpl->maSortedVector.insert(it, std::move(aWord));
+ return &*it;
+ }
+ return nullptr;
+ }
}
void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
{
- std::unique_ptr<SvxAutocorrWord> pNew(new SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
- (void)Insert(std::move(pNew));
+ (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
}
bool SvxAutocorrWordList::empty() const
{
- return mpImpl->maHash.empty() && mpImpl->maSet.empty();
+ return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
}
-std::unique_ptr<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord)
+boost::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord)
{
- std::unique_ptr<SvxAutocorrWord> pMatch;
- if ( mpImpl->maSet.empty() ) // use the hash
+ if ( mpImpl->maSortedVector.empty() ) // use the hash
{
AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
if( it != mpImpl->maHash.end() )
{
- pMatch = std::move(it->second);
+ SvxAutocorrWord pMatch = std::move(it->second);
mpImpl->maHash.erase (it);
+ return pMatch;
}
}
else
{
- AutocorrWordSetType::iterator it = mpImpl->maSet.find( pWord );
- if( it != mpImpl->maSet.end() )
+ auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
+ if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
{
- pMatch = std::unique_ptr<SvxAutocorrWord>(*it);
- mpImpl->maSet.erase (it);
+ SvxAutocorrWord pMatch = std::move(*it);
+ mpImpl->maSortedVector.erase (it);
+ return pMatch;
}
}
- return pMatch;
+ return boost::optional<SvxAutocorrWord>();
}
// return the sorted contents - defer sorting until we have to.
-SvxAutocorrWordList::Content SvxAutocorrWordList::getSortedContent() const
+const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
{
- Content aContent;
-
// convert from hash to set permanently
- if ( mpImpl->maSet.empty() )
+ if ( mpImpl->maSortedVector.empty() )
{
- // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
- for (auto & elem : mpImpl->maHash)
- mpImpl->maSet.insert( elem.second.release() );
+ std::vector<SvxAutocorrWord> tmp;
+ tmp.reserve(mpImpl->maHash.size());
+ for (auto & rPair : mpImpl->maHash)
+ tmp.emplace_back(std::move(rPair.second));
mpImpl->maHash.clear();
+ // sort twice - this gets the list into mostly-sorted order, which
+ // reduces the number of times we need to invoke the expensive ICU collate fn.
+ std::sort(tmp.begin(), tmp.end(),
+ [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
+ {
+ return lhs.GetShort() < rhs.GetShort();
+ });
+ // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
+ // stable_sort is twice as fast as sort in this situation because it does
+ // fewer comparison operations.
+ std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
+ mpImpl->maSortedVector = std::move(tmp);
}
- for (auto const& elem : mpImpl->maSet)
- aContent.push_back(elem);
-
- return aContent;
+ return mpImpl->maSortedVector;
}
const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
@@ -2776,8 +2789,7 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p
OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
// avoid double spaces before simple "word" replacement
left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
- SvxAutocorrWord* pNew = new SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern);
- if( Insert( std::unique_ptr<SvxAutocorrWord>(pNew) ) )
+ if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
return pNew;
}
} else
@@ -2843,8 +2855,7 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p
buf.append(std::u16string_view(rTxt).substr(nFndPos, nEndPos - nFndPos));
aLong = buf.makeStringAndClear();
}
- SvxAutocorrWord* pNew = new SvxAutocorrWord(aShort, aLong);
- if ( Insert( std::unique_ptr<SvxAutocorrWord>(pNew) ) )
+ if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
{
if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
return pNew;
@@ -2860,14 +2871,14 @@ const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rT
{
for (auto const& elem : mpImpl->maHash)
{
- if( const SvxAutocorrWord *aTmp = WordMatches( elem.second.get(), rTxt, rStt, nEndPos ) )
- return aTmp;
+ if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
+ return pTmp;
}
- for (auto const& elem : mpImpl->maSet)
+ for (auto const& elem : mpImpl->maSortedVector)
{
- if( const SvxAutocorrWord *aTmp = WordMatches( elem, rTxt, rStt, nEndPos ) )
- return aTmp;
+ if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
+ return pTmp;
}
return nullptr;
}
diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx
index 52cae8f9faee..e6477f28dac1 100644
--- a/include/editeng/svxacorr.hxx
+++ b/include/editeng/svxacorr.hxx
@@ -28,6 +28,7 @@
#include <editeng/swafopt.hxx>
#include <editeng/editengdllapi.h>
+#include <boost/optional.hpp>
#include <map>
#include <memory>
@@ -153,13 +154,14 @@ public:
// free any objects still in the set
~SvxAutocorrWordList();
void DeleteAndDestroyAll();
- bool Insert(std::unique_ptr<SvxAutocorrWord> pWord) const;
- std::unique_ptr<SvxAutocorrWord> FindAndRemove(SvxAutocorrWord *pWord);
+ const SvxAutocorrWord* Insert(SvxAutocorrWord aWord) const;
+ boost::optional<SvxAutocorrWord> FindAndRemove(SvxAutocorrWord *pWord);
void LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt);
bool empty() const;
- typedef std::vector<SvxAutocorrWord *> Content;
- Content getSortedContent() const;
+ struct CompareSvxAutocorrWordList;
+ typedef std::vector<SvxAutocorrWord> AutocorrWordSetType;
+ const AutocorrWordSetType & getSortedContent() const;
const SvxAutocorrWord* SearchWordsInList(const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos) const;
};
More information about the Libreoffice-commits
mailing list