[Libreoffice-commits] core.git: i18npool/source

Herbert Dürr hdu at apache.org
Thu Mar 7 10:44:51 PST 2013


 i18npool/source/search/textsearch.cxx |   21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

New commits:
commit 8c26876fea085a1bc847abba63dffa97a9499c1d
Author: Herbert Dürr <hdu at apache.org>
Date:   Fri Sep 7 12:27:55 2012 +0000

    i#120598 better emulation of regexp word-start and word-end operators
    
    The emulation of the word-start and word-end operators provided
    the previous regexp engine can be approximated much better
    by using the ICU-regexp exngines powerful look-around feature.
    
    Patch-by: Herbert Duerr
    Found-by: ldgolds33 at yahoo.com
    (cherry picked from commit ec7ef30693f10315ce80a8f5d7325a0e40855e66)
    
    Change-Id: If375d6d5bb93b3873f657673f7581f0884b3b35e

diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 075cd21..072c0be 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -681,13 +681,20 @@ void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOp
     IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
 #ifndef DISABLE_WORDBOUND_EMULATION
     // for conveniance specific syntax elements of the old regex engine are emulated
-    // by using regular word boundary matching \b to replace \< and \>
-    static const IcuUniString aChevronPattern( "\\\\<|\\\\>", -1, IcuUniString::kInvariant);
-    static const IcuUniString aChevronReplace( "\\\\b", -1, IcuUniString::kInvariant);
-    static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr);
-    aChevronMatcher.reset( aIcuSearchPatStr);
-    aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr);
-    aChevronMatcher.reset();
+    // - by replacing \< with "word-break followed by a look-ahead word-char"
+    static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
+    static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
+    static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
+    aChevronMatcherB.reset( aIcuSearchPatStr);
+    aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
+    aChevronMatcherB.reset();
+    // - by replacing \> with "look-behind word-char followed by a word-break"
+    static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
+    static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
+    static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
+    aChevronMatcherE.reset( aIcuSearchPatStr);
+    aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
+    aChevronMatcherE.reset();
 #endif
     pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
     if( nIcuErr)


More information about the Libreoffice-commits mailing list