[Libreoffice-commits] core.git: Branch 'libreoffice-4-4' - i18npool/source

Michael Stahl mstahl at redhat.com
Thu Mar 19 04:04:21 PDT 2015


 i18npool/source/search/textsearch.cxx |   25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

New commits:
commit afae8d1e9eb37265bd356773caa77d6d8ac481bc
Author: Michael Stahl <mstahl at redhat.com>
Date:   Tue Mar 10 23:19:18 2015 +0100

    i18npool: fix spurious regex ^ matching in TextSearch::searchForward()
    
    Thanks to Eike for finding this:
    
    The anchors ^ and $ now anchor at the selection boundary because the
    only text the regex matcher gets passed is the selected text. This in
    two paragraphs
    
     aaa bbb aaa bbb
     aaa bbb aaa bbb
    
    when the selection spans from the second aaa to the third bbb, for
    "^aaa" finds the second aaa, where previously it found the third aaa at
    the real paragraph start.
    
    This may not be expected by the user, because the behavior of ^ is
    described as "Match at the beginning of a line" (or paragraph in our
    case), which the previous implementation did.
    
    (regression from 806ced87cfe3da72df0d8e4faf5b82535fc7d1b7)
    
    Unfortunately it's not obvious how to implement the same in
    searchBackward().
    
    Change-Id: I07f7a8476b672d9511fa74ca473c32eea427698f
    (cherry picked from commit 9aae521b451269007f03527c83645b8b935eb419)
    Reviewed-on: https://gerrit.libreoffice.org/14829
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>
    Tested-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 959227df..094b554 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -242,13 +242,26 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
         in_str = xTranslit->transliterate( searchStr, startPos, endPos - startPos, offset );
 
         // JP 20.6.2001: also the start and end positions must be corrected!
-        sal_Int32 const newStartPos =
+        sal_Int32 newStartPos =
             (startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
 
-        sal_Int32 const newEndPos = (endPos < searchStr.getLength())
+        sal_Int32 newEndPos = (endPos < searchStr.getLength())
             ? FindPosInSeq_Impl( offset, endPos )
             : in_str.getLength();
 
+        sal_Int32 nExtraOffset = 0;
+        if (pRegexMatcher && startPos > 0)
+        {
+            // avoid matching ^ here - in_str omits a prefix of the searchStr
+            // this is a really lame way to do it, but ICU only offers
+            // useAnchoringBounds() to disable *both* bounds but what is needed
+            // here is to disable only one bound and respect the other
+            in_str = "X" + in_str;
+            nExtraOffset = 1;
+            newStartPos += nExtraOffset;
+            newEndPos += nExtraOffset;
+        }
+
         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
 
         // Map offsets back to untransliterated string.
@@ -260,14 +273,14 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
             const sal_Int32 nGroups = sres.startOffset.getLength();
             for ( sal_Int32 k = 0; k < nGroups; k++ )
             {
-                const sal_Int32 nStart = sres.startOffset[k];
+                const sal_Int32 nStart = sres.startOffset[k] - nExtraOffset;
                 if (startPos > 0 || nStart > 0)
                     sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
                 // JP 20.6.2001: end is ever exclusive and then don't return
                 //               the position of the next character - return the
                 //               next position behind the last found character!
                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
-                const sal_Int32 nStop = sres.endOffset[k];
+                const sal_Int32 nStop = sres.endOffset[k] - nExtraOffset;
                 if (startPos > 0 || nStop > 0)
                     sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
             }
@@ -345,6 +358,10 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
         sal_Int32 const newEndPos =
             (endPos == 0) ? 0 : FindPosInSeq_Impl( offset, endPos );
 
+        // TODO: this would need nExtraOffset handling to avoid $ matching
+        // if (pRegexMatcher && startPos < searchStr.getLength())
+        // but that appears to be impossible with ICU regex
+
         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
 
         // Map offsets back to untransliterated string.


More information about the Libreoffice-commits mailing list