[Libreoffice-commits] .: 4 commits - i18npool/source sw/inc sw/source
Caolán McNamara
caolan at kemper.freedesktop.org
Tue May 1 08:27:42 PDT 2012
i18npool/source/breakiterator/breakiterator_unicode.cxx | 27 ++++++++++++++--
sw/inc/breakit.hxx | 7 +++-
sw/source/core/bastyp/breakit.cxx | 19 +++++++----
sw/source/core/txtnode/txtedt.cxx | 2 -
4 files changed, 44 insertions(+), 11 deletions(-)
New commits:
commit ae716b07f7218fadf0143de1946cc9e0e2c08744
Author: Caolán McNamara <caolanm at redhat.com>
Date: Tue May 1 16:26:58 2012 +0100
Related: fdo#49208 optimize common case
Change-Id: Ieec379b08cb9096b1c8187c2eda5053f093c612d
diff --git a/sw/source/core/bastyp/breakit.cxx b/sw/source/core/bastyp/breakit.cxx
index 1fbadd8..4c84379 100644
--- a/sw/source/core/bastyp/breakit.cxx
+++ b/sw/source/core/bastyp/breakit.cxx
@@ -172,9 +172,16 @@ sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText, sal_Int32 nSta
sal_Int32 nCurPos = nStart;
while (nCurPos < nEnd)
{
- sal_Int32 nCount2 = 1;
- nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
- i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
+ //fdo#49208 cheat and assume that nothing can combine with a space
+ //to form a single grapheme
+ if (rText[nCurPos] == ' ')
+ ++nCurPos;
+ else
+ {
+ sal_Int32 nCount2 = 1;
+ nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
+ i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
+ }
++nGraphemeCount;
}
commit ccc47b3db3eae25cc11bb709416c0b61747ca89e
Author: Caolán McNamara <caolanm at redhat.com>
Date: Tue May 1 16:09:25 2012 +0100
Resolves: fdo#49208 icu string compare is shocking slow
Change-Id: Iee3ab0ebbbb72e88e33dcbe0fcb4df1e4f60c301
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 08a5e6c..4005780 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -34,6 +34,7 @@
#include <unicode/udata.h>
#include <rtl/strbuf.hxx>
#include <rtl/ustring.hxx>
+#include <string.h>
U_CDECL_BEGIN
extern const char OpenOffice_dat[];
@@ -94,6 +95,24 @@ class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
};
+namespace
+{
+ bool isEqual(const UnicodeString &rOne, const rtl::OUString &rOther)
+ {
+ sal_Int32 nLength = rOne.length();
+ if (nLength != rOther.getLength())
+ return false;
+
+ //fdo#49208 operator== is implemented by compareTo etc in icu which is
+ //horrifically slow when all you want to know is that they're the same
+ //or not
+ const UChar *pOne = rOne.getBuffer();
+ // UChar != sal_Unicode in MinGW
+ const UChar *pOther = reinterpret_cast<const UChar *>(rOther.getStr());
+ return memcmp(pOne, pOther, nLength * sizeof(UChar)) == 0;
+ }
+}
+
// loading ICU breakiterator on demand.
void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
@@ -199,10 +218,10 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
}
}
- // UChar != sal_Unicode in MinGW
- const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
- if (newBreak || icuBI->aICUText.compare(pText, rText.getLength()))
+ if (newBreak || !isEqual(icuBI->aICUText, rText))
{
+ // UChar != sal_Unicode in MinGW
+ const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
icuBI->aICUText=UnicodeString(pText, rText.getLength());
icuBI->aBreakIterator->setText(icuBI->aICUText);
}
commit fd4fe85329654883a0bf3304ad0aa8ef0bfde844
Author: Caolán McNamara <caolanm at redhat.com>
Date: Tue May 1 16:06:25 2012 +0100
Related: fdo#49208 don't copy string if we can reuse the original
Change-Id: I95d82ce168fd1790107316460f6ddbd9f6b32e18
diff --git a/sw/inc/breakit.hxx b/sw/inc/breakit.hxx
index 5fa2bd5..031d91c 100644
--- a/sw/inc/breakit.hxx
+++ b/sw/inc/breakit.hxx
@@ -96,7 +96,12 @@ public:
sal_uInt16 GetRealScriptOfText( const rtl::OUString& rTxt, sal_Int32 nPos ) const;
sal_uInt16 GetAllScriptsOfText( const rtl::OUString& rTxt ) const;
- sal_Int32 getGraphemeCount(const rtl::OUString& rStr) const;
+ sal_Int32 getGraphemeCount(const rtl::OUString& rStr,
+ sal_Int32 nStart, sal_Int32 nEnd) const;
+ sal_Int32 getGraphemeCount(const rtl::OUString& rStr) const
+ {
+ return getGraphemeCount(rStr, 0, rStr.getLength());
+ }
};
#define SW_BREAKITER() SwBreakIt::Get()
diff --git a/sw/source/core/bastyp/breakit.cxx b/sw/source/core/bastyp/breakit.cxx
index 313fdca..1fbadd8 100644
--- a/sw/source/core/bastyp/breakit.cxx
+++ b/sw/source/core/bastyp/breakit.cxx
@@ -165,12 +165,12 @@ sal_uInt16 SwBreakIt::GetAllScriptsOfText( const rtl::OUString& rTxt ) const
return nRet;
}
-sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText) const
+sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText, sal_Int32 nStart, sal_Int32 nEnd) const
{
sal_Int32 nGraphemeCount = 0;
- sal_Int32 nCurPos = 0;
- while (nCurPos < rText.getLength())
+ sal_Int32 nCurPos = nStart;
+ while (nCurPos < nEnd)
{
sal_Int32 nCount2 = 1;
nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx
index cb5e472..ae8fe55 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -1913,7 +1913,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
}
}
- nTmpChars = pBreakIt->getGraphemeCount(aExpandText.copy(nExpandBegin, nExpandEnd - nExpandBegin));
+ nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd);
nTmpChars -= nNumOfMaskedChars;
// no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars
commit 268ec2e64f89eb39fd5f02688787cd6f53e948b5
Author: Caolán McNamara <caolanm at redhat.com>
Date: Tue May 1 15:08:29 2012 +0100
Related: fdo#49208 crazy to create the string *twice*
Change-Id: Ib31919672d0754fa4f650dcb32dc2c59a410b54c
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 0d6df0f..08a5e6c 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -199,13 +199,15 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
}
}
- if (newBreak || icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()))) { // UChar != sal_Unicode in MinGW
- icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength());
+ // UChar != sal_Unicode in MinGW
+ const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
+ if (newBreak || icuBI->aICUText.compare(pText, rText.getLength()))
+ {
+ icuBI->aICUText=UnicodeString(pText, rText.getLength());
icuBI->aBreakIterator->setText(icuBI->aICUText);
}
}
-
sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
sal_Int32 nStartPos, const lang::Locale &rLocale,
sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
More information about the Libreoffice-commits
mailing list