[Libreoffice-commits] .: i18npool/inc i18npool/source

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Fri Dec 21 03:39:19 PST 2012


 i18npool/inc/breakiterator_unicode.hxx                  |    7 -
 i18npool/source/breakiterator/breakiterator_unicode.cxx |   80 +++++++---------
 2 files changed, 41 insertions(+), 46 deletions(-)

New commits:
commit 9c6006b961f690728f4035c10f8b9fe9fdb6f332
Author: Michael Meeks <michael.meeks at suse.com>
Date:   Thu Dec 20 23:04:15 2012 +0000

    fdo#58590 - cleanup and accelerate break-iterators.
    
    Doing word-count by switching per-word between two different
    kinds of word iterator was insanely slow. This preserves an
    ICU break-iterator for each type of word-breaking.

diff --git a/i18npool/inc/breakiterator_unicode.hxx b/i18npool/inc/breakiterator_unicode.hxx
index 26046ea..fe226d4 100644
--- a/i18npool/inc/breakiterator_unicode.hxx
+++ b/i18npool/inc/breakiterator_unicode.hxx
@@ -80,6 +80,7 @@ protected:
         rtl::OUString aICUText;
         UText *ut;
         icu::BreakIterator *aBreakIterator;
+        com::sun::star::lang::Locale maLocale;
 
         BI_Data()
             : ut(NULL)
@@ -91,10 +92,10 @@ protected:
             utext_close(ut);
         }
 
-    } character, word, sentence, line, *icuBI;
+    } character, sentence, line, *icuBI;
+    BI_Data words[4]; // 4 is css::i18n::WordType enumeration size
 
-    com::sun::star::lang::Locale aLocale;
-    sal_Int16 aBreakType, aWordType;
+    sal_Int16 aBreakType;
 
     void SAL_CALL loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
         sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char* name, const rtl::OUString& rText) throw(com::sun::star::uno::RuntimeException);
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 242cfa6..77ca831 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -44,29 +44,17 @@ BreakIterator_Unicode::BreakIterator_Unicode() :
     cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ),    // implementation name
     wordRule( "word" ),
     lineRule( "line" ),
-    result(),
-    character(),
-    word(),
-    sentence(),
-    line(),
-    icuBI( NULL ),
-    aLocale(),
-    aBreakType(),
-    aWordType()
+    icuBI( NULL )
 {
 }
 
-
 BreakIterator_Unicode::~BreakIterator_Unicode()
 {
-        if (icuBI && icuBI->aBreakIterator) {
-            delete icuBI->aBreakIterator;
-            icuBI->aBreakIterator=NULL;
-        }
-        if (character.aBreakIterator) delete character.aBreakIterator;
-        if (word.aBreakIterator) delete word.aBreakIterator;
-        if (sentence.aBreakIterator) delete sentence.aBreakIterator;
-        if (line.aBreakIterator) delete line.aBreakIterator;
+    delete character.aBreakIterator;
+    delete sentence.aBreakIterator;
+    delete line.aBreakIterator;
+    for (size_t i = 0; i < SAL_N_ELEMENTS(words); i++)
+        delete words[i].aBreakIterator;
 }
 
 /*
@@ -86,26 +74,34 @@ class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
 
 // loading ICU breakiterator on demand.
 void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
-        sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
+        sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
 {
     sal_Bool newBreak = sal_False;
     UErrorCode status = U_ZERO_ERROR;
     sal_Int16 breakType = 0;
     switch (rBreakType) {
         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
-        case LOAD_WORD_BREAKITERATOR: icuBI=&word;
-            switch (rWordType) {
-                case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break;
-                case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break;
-                case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break;
+        case LOAD_WORD_BREAKITERATOR:
+            assert (nWordType >= 0 && nWordType<= WordType::WORD_COUNT);
+            icuBI=&words[nWordType];
+            switch (nWordType) {
+                case WordType::ANY_WORD: break; // odd but previous behavior
+                case WordType::ANYWORD_IGNOREWHITESPACES:
+                    breakType = 0; rule = wordRule = "edit_word"; break;
+                case WordType::DICTIONARY_WORD:
+                    breakType = 1; rule = wordRule = "dict_word"; break;
+                default:
+                case WordType::WORD_COUNT:
+                    breakType = 2; rule = wordRule = "count_word"; break;
             }
             break;
         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
     }
-    if (!icuBI->aBreakIterator || rWordType != aWordType ||
-            rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country ||
-            rLocale.Variant != aLocale.Variant) {
+    if (!icuBI->aBreakIterator ||
+        rLocale.Language != icuBI->maLocale.Language ||
+        rLocale.Country  != icuBI->maLocale.Country  ||
+        rLocale.Variant  != icuBI->maLocale.Variant) {
         if (icuBI->aBreakIterator) {
             delete icuBI->aBreakIterator;
             icuBI->aBreakIterator=NULL;
@@ -180,9 +176,7 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
             }
         }
         if (icuBI->aBreakIterator) {
-            aLocale=rLocale;
-            aWordType=rWordType;
-            aBreakType=rBreakType;
+            icuBI->maLocale=rLocale;
             newBreak=sal_True;
         } else {
             throw ERROR;
@@ -252,16 +246,16 @@ Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int
 {
         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
 
-        result.startPos = word.aBreakIterator->following(nStartPos);
+        result.startPos = icuBI->aBreakIterator->following(nStartPos);
         if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
             result.endPos = result.startPos;
         else {
             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
                     rWordType == WordType::DICTIONARY_WORD ) &&
                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
-                result.startPos = word.aBreakIterator->following(result.startPos);
+                result.startPos = icuBI->aBreakIterator->following(result.startPos);
 
-            result.endPos = word.aBreakIterator->following(result.startPos);
+            result.endPos = icuBI->aBreakIterator->following(result.startPos);
             if(result.endPos == BreakIterator::DONE)
                 result.endPos = result.startPos;
         }
@@ -274,16 +268,16 @@ Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_
 {
         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
 
-        result.startPos = word.aBreakIterator->preceding(nStartPos);
+        result.startPos = icuBI->aBreakIterator->preceding(nStartPos);
         if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
             result.endPos = result.startPos;
         else {
             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
                     rWordType == WordType::DICTIONARY_WORD) &&
                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
-                result.startPos = word.aBreakIterator->preceding(result.startPos);
+                result.startPos = icuBI->aBreakIterator->preceding(result.startPos);
 
-            result.endPos = word.aBreakIterator->following(result.startPos);
+            result.endPos = icuBI->aBreakIterator->following(result.startPos);
             if(result.endPos == BreakIterator::DONE)
                 result.endPos = result.startPos;
         }
@@ -297,22 +291,22 @@ Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text,
         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
         sal_Int32 len = Text.getLength();
 
-        if(word.aBreakIterator->isBoundary(nPos)) {
+        if(icuBI->aBreakIterator->isBoundary(nPos)) {
             result.startPos = result.endPos = nPos;
             if((bDirection || nPos == 0) && nPos < len) //forward
-                result.endPos = word.aBreakIterator->following(nPos);
+                result.endPos = icuBI->aBreakIterator->following(nPos);
             else
-                result.startPos = word.aBreakIterator->preceding(nPos);
+                result.startPos = icuBI->aBreakIterator->preceding(nPos);
         } else {
             if(nPos <= 0) {
                 result.startPos = 0;
-                result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0;
+                result.endPos = len ? icuBI->aBreakIterator->following((sal_Int32)0) : 0;
             } else if(nPos >= len) {
-                result.startPos = word.aBreakIterator->preceding(len);
+                result.startPos = icuBI->aBreakIterator->preceding(len);
                 result.endPos = len;
             } else {
-                result.startPos = word.aBreakIterator->preceding(nPos);
-                result.endPos = word.aBreakIterator->following(nPos);
+                result.startPos = icuBI->aBreakIterator->preceding(nPos);
+                result.endPos = icuBI->aBreakIterator->following(nPos);
             }
         }
         if (result.startPos == BreakIterator::DONE)


More information about the Libreoffice-commits mailing list