[Libreoffice-commits] core.git: 2 commits - i18npool/qa i18npool/source

Eike Rathke erack at redhat.com
Wed Feb 17 20:17:58 UTC 2016


 i18npool/qa/cppunit/test_textsearch.cxx |  121 +++++++++
 i18npool/source/search/textsearch.cxx   |  401 +++++++++++++++++++++++++++++++-
 i18npool/source/search/textsearch.hxx   |   14 +
 3 files changed, 533 insertions(+), 3 deletions(-)

New commits:
commit 8fd8a8875def353db34c68ee74d40772b83bae4c
Author: Eike Rathke <erack at redhat.com>
Date:   Wed Feb 17 21:13:50 2016 +0100

    wildcard search unit tests, tdf#72196
    
    Change-Id: Ifa23592f5e72438267926acf89c113a69a7f0ca0

diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx
index 50bab9a..1cb14f6 100644
--- a/i18npool/qa/cppunit/test_textsearch.cxx
+++ b/i18npool/qa/cppunit/test_textsearch.cxx
@@ -21,8 +21,8 @@
 #include <cppuhelper/basemutex.hxx>
 #include <com/sun/star/util/SearchFlags.hpp>
 #include <com/sun/star/util/SearchOptions.hpp>
-#include <com/sun/star/util/SearchAlgorithms.hpp>
-#include <com/sun/star/util/XTextSearch.hpp>
+#include <com/sun/star/util/SearchAlgorithms2.hpp>
+#include <com/sun/star/util/XTextSearch2.hpp>
 #include <com/sun/star/i18n/Transliteration.hpp>
 #include <unotest/bootstrapfixturebase.hxx>
 
@@ -43,13 +43,16 @@ public:
 
     void testICU();
     void testSearches();
+    void testWildcardSearch();
 
     CPPUNIT_TEST_SUITE(TestTextSearch);
     CPPUNIT_TEST(testICU);
     CPPUNIT_TEST(testSearches);
+    CPPUNIT_TEST(testWildcardSearch);
     CPPUNIT_TEST_SUITE_END();
 private:
     uno::Reference<util::XTextSearch> m_xSearch;
+    uno::Reference<util::XTextSearch2> m_xSearch2;
 };
 
 // Sanity check our ICU first ...
@@ -135,15 +138,129 @@ void TestTextSearch::testSearches()
     CPPUNIT_ASSERT((aRes.startOffset[2] == 3) && (aRes.endOffset[2] == 5));
 }
 
+void TestTextSearch::testWildcardSearch()
+{
+    util::SearchOptions2 aOptions;
+    OUString aText;
+    util::SearchResult aRes;
+
+    aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ;
+    aOptions.transliterateFlags = ::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE;
+    aText = "abAca";
+
+    aOptions.searchString = "a";
+    m_xSearch2->setOptions2( aOptions );
+    // match first "a", [0,1)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 1));
+    // match last "a", (5,4]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 4));
+
+    aOptions.searchString = "a?";
+    m_xSearch2->setOptions2( aOptions );
+    // match "ab", [0,2)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 2));
+    // match "ac", (4,2]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 2));
+
+    aOptions.searchString = "a*c";
+    m_xSearch2->setOptions2( aOptions );
+    // match "abac", [0,4) XXX NOTE: first match forward
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 4));
+    // match "ac", (4,2] XXX NOTE: first match backward, not greedy
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 2));
+
+    aOptions.searchString = "b*a";
+    m_xSearch2->setOptions2( aOptions );
+    // match "ba", [1,3) XXX NOTE: first match forward, not greedy
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 3));
+    // match "baca", (5,1] XXX NOTE: first match backward
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 1));
+
+    aText = "ab?ca";
+
+    aOptions.searchString = "?~??";
+    m_xSearch2->setOptions2( aOptions );
+    // match "b?c", [1,4)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 4));
+    // match "b?c", (4,1]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 1));
+
+    aText = "ab*ca";
+
+    aOptions.searchString = "?~*?";
+    m_xSearch2->setOptions2( aOptions );
+    // match "b?c", [1,4)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 4));
+    // match "b?c", (4,1]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 1));
+
+    aOptions.searchString = "ca?";
+    m_xSearch2->setOptions2( aOptions );
+    // no match
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 0);
+    // no match
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 0);
+
+    aOptions.searchString = "ca*";
+    m_xSearch2->setOptions2( aOptions );
+    // match "ca", [3,5)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 3) && (aRes.endOffset[0] == 5));
+    // match "ca", (5,3]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 3));
+
+    aOptions.searchString = "*ca*";
+    m_xSearch2->setOptions2( aOptions );
+    // match "abaca", [0,5)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 5));
+    // match "abaca", (5,0]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT(aRes.subRegExpressions == 1);
+    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 0));
+}
+
 void TestTextSearch::setUp()
 {
     BootstrapFixtureBase::setUp();
     m_xSearch.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch"), uno::UNO_QUERY_THROW);
+    m_xSearch2.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch2"), uno::UNO_QUERY_THROW);
 }
 
 void TestTextSearch::tearDown()
 {
     m_xSearch.clear();
+    m_xSearch2.clear();
     BootstrapFixtureBase::tearDown();
 }
 
commit f1a4663c819bf698f95a75b5a3319506c66f2778
Author: Eike Rathke <erack at redhat.com>
Date:   Wed Feb 17 19:26:51 2016 +0100

    implement '*' '?' '~' wildcard search, tdf#72196
    
    Change-Id: Id6122a13423c37e91c9f7561e4d8e5c658d5530e

diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 906fc7b..90bced1 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -36,6 +36,7 @@
 #include <cppuhelper/factory.hxx>
 #include <cppuhelper/supportsservice.hxx>
 #include <cppuhelper/weak.hxx>
+#include <rtl/ustrbuf.hxx>
 #include <sal/log.hxx>
 
 #ifdef _MSC_VER
@@ -112,6 +113,8 @@ TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
         , pJumpTable2( nullptr )
         , pRegexMatcher( nullptr )
         , pWLD( nullptr )
+        , mcWildcardEscapeChar('~')         /* TODO: make this option available through API */
+        , mbWildcardAllowSubstring(true)    /* TODO: make this option available through API */
 {
     SearchOptions2 aOpt;
     aOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
@@ -137,6 +140,8 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) throw( RuntimeExc
     delete pWLD, pWLD = nullptr;
     delete pJumpTable, pJumpTable = nullptr;
     delete pJumpTable2, pJumpTable2 = nullptr;
+    maWildcardReversePattern.clear();
+    maWildcardReversePattern2.clear();
 
     // Create Transliteration class
     if( isSimpleTrans( aSrchPara.transliterateFlags) )
@@ -235,8 +240,12 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) throw( RuntimeExc
             nLimit = pWLD->GetLimit();
             break;
 
+        case SearchAlgorithms2::WILDCARD:
+            fnForward = &TextSearch::WildcardSrchFrwrd;
+            fnBackward = &TextSearch::WildcardSrchBkwrd;
+            break;
+
         default:
-        case SearchAlgorithms2::WILDCARD:   /* FIXME: temporary */
             SAL_WARN("i18npool","TextSearch::setOptions2 - default what?");
             // fallthru
         case SearchAlgorithms2::ABSOLUTE:
@@ -1118,6 +1127,396 @@ SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
 }
 
 
+namespace {
+void setWildcardMatch( css::util::SearchResult& rRes, sal_Int32 nStartOffset, sal_Int32 nEndOffset )
+{
+    rRes.subRegExpressions = 1;
+    rRes.startOffset.realloc(1);
+    rRes.endOffset.realloc(1);
+    rRes.startOffset[0] = nStartOffset;
+    rRes.endOffset[0] = nEndOffset;
+}
+}
+
+SearchResult TextSearch::WildcardSrchFrwrd( const OUString& searchStr, sal_Int32 nStartPos, sal_Int32 nEndPos )
+        throw(RuntimeException)
+{
+    SearchResult aRes;
+    aRes.subRegExpressions = 0;     // no match
+    sal_Int32 nStartOffset = nStartPos;
+    sal_Int32 nEndOffset = nEndPos;
+
+    const sal_Int32 nStringLen = searchStr.getLength();
+
+    // Forward nStartPos inclusive, nEndPos exclusive, but allow for empty
+    // string match with [0,0).
+    if (nStartPos < 0 || nEndPos > nStringLen || nEndPos < nStartPos || nStartPos > nStringLen ||
+            (nStartPos == nStringLen && (nStringLen != 0 || nStartPos != nEndPos)))
+        return aRes;
+
+    const OUString& rPattern = (bUsePrimarySrchStr ? sSrchStr : sSrchStr2);
+    const sal_Int32 nPatternLen = rPattern.getLength();
+
+    // Handle special cases empty pattern and/or string outside of the loop to
+    // not add performance penalties there and simplify.
+    if (nStartPos == nEndPos)
+    {
+        sal_Int32 i = 0;
+        while (i < nPatternLen && rPattern[i] == '*')
+            ++i;
+        if (i == nPatternLen)
+            setWildcardMatch( aRes, nStartOffset, nEndOffset);
+        return aRes;
+    }
+
+    // Empty pattern does not match any non-empty string.
+    if (!nPatternLen)
+        return aRes;
+
+    bool bRewind = false;
+    sal_uInt32 cPattern = 0;
+    sal_Int32 nPattern = 0;
+    sal_Int32 nAfterFakePattern = nPattern;
+    if (mbWildcardAllowSubstring)
+    {
+        // Fake a leading '*' wildcard.
+        cPattern = '*';
+        bRewind = true;
+        // Assume a non-'*' pattern character follows. If it is a '*' instead
+        // that will be handled in the loop by setting nPat.
+        sal_uInt32 cu = rPattern.iterateCodePoints( &nAfterFakePattern);
+        if (cu == mcWildcardEscapeChar && mcWildcardEscapeChar && nAfterFakePattern < nPatternLen)
+            rPattern.iterateCodePoints( &nAfterFakePattern);
+    }
+
+    sal_Int32 nString = nStartPos, nPat = -1, nStr = -1;
+    sal_uInt32 cPatternAfterAsterisk = 0;
+    bool bEscaped = false, bEscapedAfterAsterisk = false;
+
+    // The loop code tries to avoid costly calls to iterateCodePoints() when
+    // possible.
+
+    do
+    {
+        if (bRewind)
+        {
+            // Reuse cPattern after '*', nPattern was correspondingly
+            // incremented to point behind cPattern.
+            bRewind = false;
+        }
+        else if (nPattern < nPatternLen)
+        {
+            // nPattern will be incremented by iterateCodePoints().
+            cPattern = rPattern.iterateCodePoints( &nPattern);
+            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern < nPatternLen)
+            {
+                bEscaped = true;
+                cPattern = rPattern.iterateCodePoints( &nPattern);
+            }
+        }
+        else
+        {
+            // A trailing '*' is handled below. If the pattern is consumed and
+            // substring match allowed we're good.
+            if (mbWildcardAllowSubstring)
+                setWildcardMatch( aRes, nStartOffset, nString);
+            return aRes;
+        }
+
+        if (cPattern == '*' && !bEscaped)
+        {
+            // '*' is one code unit, so not using iterateCodePoints() is ok.
+            while (nPattern < nPatternLen && rPattern[nPattern] == '*')
+                ++nPattern;
+
+            if (nPattern >= nPatternLen)
+            {
+                // Last pattern is '*', remaining string matches.
+                setWildcardMatch( aRes, nStartOffset, nEndOffset);
+                return aRes;
+            }
+
+            // cPattern will be the next non-'*' character, nPattern
+            // incremented.
+            cPattern = rPattern.iterateCodePoints( &nPattern);
+            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern < nPatternLen)
+            {
+                bEscaped = true;
+                cPattern = rPattern.iterateCodePoints( &nPattern);
+            }
+
+            cPatternAfterAsterisk = cPattern;
+            bEscapedAfterAsterisk = bEscaped;
+            nPat = nPattern;    // Remember position of pattern behind '*', already incremented.
+            nStr = nString;     // Remember the current string to be matched.
+        }
+
+        if (nString >= nEndPos)
+            // Whatever follows in pattern, string will not match.
+            return aRes;
+
+        // nString will be incremented by iterateCodePoints().
+        sal_uInt32 cString = searchStr.iterateCodePoints( &nString);
+
+        if ((cPattern != '?' || bEscaped) && cPattern != cString)
+        {
+            if (nPat == -1)
+                // Non-match already without any '*' pattern.
+                return aRes;
+
+            bRewind = true;
+            nPattern = nPat;                    // Rewind pattern to character behind '*', already incremented.
+            cPattern = cPatternAfterAsterisk;
+            bEscaped = bEscapedAfterAsterisk;
+            searchStr.iterateCodePoints( &nStr);
+            nString = nStr;                     // Restore incremented remembered string position.
+            if (nPat == nAfterFakePattern)
+            {
+                // Next start offset will be the next character.
+                nStartOffset = nString;
+            }
+        }
+        else
+        {
+            // An unescaped '?' pattern matched any character, or characters
+            // matched. Reset only escaped state.
+            bEscaped = false;
+        }
+    }
+    while (nString < nEndPos);
+
+    if (bRewind)
+        return aRes;
+
+    // Eat trailing '*' pattern that matches anything, including nothing.
+    // '*' is one code unit, so not using iterateCodePoints() is ok.
+    while (nPattern < nPatternLen && rPattern[nPattern] == '*')
+        ++nPattern;
+
+    if (nPattern == nPatternLen)
+        setWildcardMatch( aRes, nStartOffset, nEndOffset);
+    return aRes;
+}
+
+SearchResult TextSearch::WildcardSrchBkwrd( const OUString& searchStr, sal_Int32 nStartPos, sal_Int32 nEndPos )
+        throw(RuntimeException)
+{
+    SearchResult aRes;
+    aRes.subRegExpressions = 0;     // no match
+
+    sal_Int32 nStartOffset = nStartPos;
+    sal_Int32 nEndOffset = nEndPos;
+
+    const sal_Int32 nStringLen = searchStr.getLength();
+
+    // Backward nStartPos exclusive, nEndPos inclusive, but allow for empty
+    // string match with (0,0].
+    if (nStartPos > nStringLen || nEndPos < 0 || nStartPos < nEndPos || nEndPos > nStringLen ||
+            (nEndPos == nStringLen && (nStringLen != 0 || nStartPos != nEndPos)))
+        return aRes;
+
+    const OUString& rPattern = (bUsePrimarySrchStr ? sSrchStr : sSrchStr2);
+    sal_Int32 nPatternLen = rPattern.getLength();
+
+    // Handle special cases empty pattern and/or string outside of the loop to
+    // not add performance penalties there and simplify.
+    if (nStartPos == nEndPos)
+    {
+        sal_Int32 i = 0;
+        while (i < nPatternLen && rPattern[i] == '*')
+            ++i;
+        if (i == nPatternLen)
+            setWildcardMatch( aRes, nStartOffset, nEndOffset);
+        return aRes;
+    }
+
+    // Empty pattern does not match any non-empty string.
+    if (!nPatternLen)
+        return aRes;
+
+    // Reverse escaped patterns to ease the handling of escapes, keeping escape
+    // and following character as one sequence in backward direction.
+    if ((bUsePrimarySrchStr && maWildcardReversePattern.isEmpty()) ||
+            (!bUsePrimarySrchStr && maWildcardReversePattern2.isEmpty()))
+    {
+        OUStringBuffer aPatternBuf( rPattern);
+        sal_Int32 nIndex = 0;
+        while (nIndex < nPatternLen)
+        {
+            const sal_Int32 nOld = nIndex;
+            const sal_uInt32 cu = rPattern.iterateCodePoints( &nIndex);
+            if (cu == mcWildcardEscapeChar)
+            {
+                if (nIndex < nPatternLen)
+                {
+                    if (nIndex - nOld == 1)
+                    {
+                        // Simply move code units, we already memorized the one
+                        // in 'cu'.
+                        const sal_Int32 nOld2 = nIndex;
+                        rPattern.iterateCodePoints( &nIndex);
+                        for (sal_Int32 i=0; i < nIndex - nOld2; ++i)
+                            aPatternBuf[nOld+i] = rPattern[nOld2+i];
+                        aPatternBuf[nIndex-1] = static_cast<sal_Unicode>(cu);
+                    }
+                    else
+                    {
+                        // Copy the escape character code units first in the
+                        // unlikely case that it would not be of BMP.
+                        assert(nIndex - nOld == 2);  // it's UTF-16, so..
+                        sal_Unicode buf[2];
+                        buf[0] = rPattern[nOld];
+                        buf[1] = rPattern[nOld+1];
+                        const sal_Int32 nOld2 = nIndex;
+                        rPattern.iterateCodePoints( &nIndex);
+                        for (sal_Int32 i=0; i < nIndex - nOld2; ++i)
+                            aPatternBuf[nOld+i] = rPattern[nOld2+i];
+                        aPatternBuf[nIndex-2] = buf[0];
+                        aPatternBuf[nIndex-1] = buf[1];
+                    }
+                }
+                else
+                {
+                    // Trailing escape would become leading escape, do what?
+                    // Eliminate.
+                    aPatternBuf.remove( nOld, nIndex - nOld);
+                }
+            }
+        }
+        if (bUsePrimarySrchStr)
+            maWildcardReversePattern = aPatternBuf.makeStringAndClear();
+        else
+            maWildcardReversePattern2 = aPatternBuf.makeStringAndClear();
+    }
+    const OUString& rReversePattern = (bUsePrimarySrchStr ? maWildcardReversePattern : maWildcardReversePattern2);
+    nPatternLen = rReversePattern.getLength();
+
+    bool bRewind = false;
+    sal_uInt32 cPattern = 0;
+    sal_Int32 nPattern = nPatternLen;
+    sal_Int32 nAfterFakePattern = nPattern;
+    if (mbWildcardAllowSubstring)
+    {
+        // Fake a trailing '*' wildcard.
+        cPattern = '*';
+        bRewind = true;
+        // Assume a non-'*' pattern character follows. If it is a '*' instead
+        // that will be handled in the loop by setting nPat.
+        sal_uInt32 cu = rReversePattern.iterateCodePoints( &nAfterFakePattern, -1);
+        if (cu == mcWildcardEscapeChar && mcWildcardEscapeChar && nAfterFakePattern > 0)
+            rReversePattern.iterateCodePoints( &nAfterFakePattern, -1);
+    }
+
+    sal_Int32 nString = nStartPos, nPat = -1, nStr = -1;
+    sal_uInt32 cPatternAfterAsterisk = 0;
+    bool bEscaped = false, bEscapedAfterAsterisk = false;
+
+    // The loop code tries to avoid costly calls to iterateCodePoints() when
+    // possible.
+
+    do
+    {
+        if (bRewind)
+        {
+            // Reuse cPattern after '*', nPattern was correspondingly
+            // decremented to point before cPattern.
+            bRewind = false;
+        }
+        else if (nPattern > 0)
+        {
+            // nPattern will be decremented by iterateCodePoints().
+            cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
+            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern > 0)
+            {
+                bEscaped = true;
+                cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
+            }
+        }
+        else
+        {
+            // A leading '*' is handled below. If the pattern is consumed and
+            // substring match allowed we're good.
+            if (mbWildcardAllowSubstring)
+                setWildcardMatch( aRes, nStartOffset, nString);
+            return aRes;
+        }
+
+        if (cPattern == '*' && !bEscaped)
+        {
+            // '*' is one code unit, so not using iterateCodePoints() is ok.
+            while (nPattern > 0 && rReversePattern[nPattern-1] == '*')
+                --nPattern;
+
+            if (nPattern <= 0)
+            {
+                // First pattern is '*', remaining string matches.
+                setWildcardMatch( aRes, nStartOffset, nEndOffset);
+                return aRes;
+            }
+
+            // cPattern will be the previous non-'*' character, nPattern
+            // decremented.
+            cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
+            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern > 0)
+            {
+                bEscaped = true;
+                cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
+            }
+
+            cPatternAfterAsterisk = cPattern;
+            bEscapedAfterAsterisk = bEscaped;
+            nPat = nPattern;    // Remember position of pattern before '*', already decremented.
+            nStr = nString;     // Remember the current string to be matched.
+        }
+
+        if (nString <= nEndPos)
+            // Whatever leads in pattern, string will not match.
+            return aRes;
+
+        // nString will be decremented by iterateCodePoints().
+        sal_uInt32 cString = searchStr.iterateCodePoints( &nString, -1);
+
+        if ((cPattern != '?' || bEscaped) && cPattern != cString)
+        {
+            if (nPat == -1)
+                // Non-match already without any '*' pattern.
+                return aRes;
+
+            bRewind = true;
+            nPattern = nPat;                    // Rewind pattern to character before '*', already decremented.
+            cPattern = cPatternAfterAsterisk;
+            bEscaped = bEscapedAfterAsterisk;
+            searchStr.iterateCodePoints( &nStr, -1);
+            nString = nStr;                     // Restore decremented remembered string position.
+            if (nPat == nAfterFakePattern)
+            {
+                // Next start offset will be this character (exclusive).
+                nStartOffset = nString;
+            }
+        }
+        else
+        {
+            // An unescaped '?' pattern matched any character, or characters
+            // matched. Reset only escaped state.
+            bEscaped = false;
+        }
+    }
+    while (nString > nEndPos);
+
+    if (bRewind)
+        return aRes;
+
+    // Eat leading '*' pattern that matches anything, including nothing.
+    // '*' is one code unit, so not using iterateCodePoints() is ok.
+    while (nPattern > 0 && rReversePattern[nPattern-1] == '*')
+        --nPattern;
+
+    if (nPattern == 0)
+        setWildcardMatch( aRes, nStartOffset, nEndOffset);
+    return aRes;
+}
+
+
 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
 
 static uno::Sequence< OUString > getServiceName_Static()
diff --git a/i18npool/source/search/textsearch.hxx b/i18npool/source/search/textsearch.hxx
index ae189c2..49c1dfb 100644
--- a/i18npool/source/search/textsearch.hxx
+++ b/i18npool/source/search/textsearch.hxx
@@ -106,6 +106,20 @@ class TextSearch: public cppu::WeakImplHelper
                                 sal_Int32 startPos, sal_Int32 endPos )
                             throw(css::uno::RuntimeException);
 
+    // Members and methods for the wildcard search
+    OUString    maWildcardReversePattern;
+    OUString    maWildcardReversePattern2;
+    sal_uInt32  mcWildcardEscapeChar;
+    bool        mbWildcardAllowSubstring;
+    css::util::SearchResult SAL_CALL
+        WildcardSrchFrwrd( const OUString& searchStr,
+                                sal_Int32 startPos, sal_Int32 endPos )
+                            throw(css::uno::RuntimeException);
+    css::util::SearchResult SAL_CALL
+        WildcardSrchBkwrd( const OUString& searchStr,
+                                sal_Int32 startPos, sal_Int32 endPos )
+                            throw(css::uno::RuntimeException);
+
     bool IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const;
 
     bool checkCTLStart, checkCTLEnd;


More information about the Libreoffice-commits mailing list