[Libreoffice-commits] core.git: Branch 'libreoffice-7-0' - i18npool/qa i18npool/source

László Németh (via logerrit) logerrit at kemper.freedesktop.org
Tue Nov 17 08:56:57 UTC 2020


 i18npool/qa/cppunit/test_textsearch.cxx |  115 ++++++++++++++++++++++++++++++++
 i18npool/source/search/textsearch.cxx   |   30 ++++++++
 i18npool/source/search/textsearch.hxx   |    3 
 3 files changed, 148 insertions(+)

New commits:
commit 853c9199444cf893583992bede981c494da21ceb
Author:     László Németh <nemeth at numbertext.org>
AuthorDate: Thu Nov 12 11:33:05 2020 +0100
Commit:     Xisco Fauli <xiscofauli at libreoffice.org>
CommitDate: Tue Nov 17 09:56:26 2020 +0100

    tdf#117643 Writer: fix apostrophe search regression
    
    During text search, ASCII apostrophe ' (U+0027)
    of the search term matches the typographic
    apostrophe ’ (U+2019) of the text, too.
    
    There was a UX regression in document editing from
    commit e6fade1ce133039d28369751b77ac8faff6e40cb
    (tdf#38395 enable smart apostrophe replacement by default),
    because Find and Replace window and Find toolbar
    doesn't replace ASCII apostrophe, so the search term
    hadn't matched the text (now with the automatically
    replaced typographic apostrophes), as before the commit.
    
    Regex search hasn't been modified, i.e. searching U+2019
    is still necessary a search term with U+2019.
    
    The typographic apostrophes of a search term only match
    ASCII apostrophes of the text, if the search term contain
    also an ASCII apostrophe, too.
    
    Note: as a more sophisticated solution, it's possible to
    add a new default transliteration option for this later.
    
    Change-Id: I5121edbef5cf34fdd5b5f9ba3c046a06329a756a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105717
    Tested-by: Jenkins
    Reviewed-by: László Németh <nemeth at numbertext.org>
    (cherry picked from commit d40f2d02df26e216f367b5da3f9546b73f250469)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105858

diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx
index b2175b21bfa5..22ded53cda99 100644
--- a/i18npool/qa/cppunit/test_textsearch.cxx
+++ b/i18npool/qa/cppunit/test_textsearch.cxx
@@ -38,11 +38,13 @@ public:
     void testICU();
     void testSearches();
     void testWildcardSearch();
+    void testApostropheSearch();
 
     CPPUNIT_TEST_SUITE(TestTextSearch);
     CPPUNIT_TEST(testICU);
     CPPUNIT_TEST(testSearches);
     CPPUNIT_TEST(testWildcardSearch);
+    CPPUNIT_TEST(testApostropheSearch);
     CPPUNIT_TEST_SUITE_END();
 private:
     uno::Reference<util::XTextSearch> m_xSearch;
@@ -266,6 +268,119 @@ void TestTextSearch::testWildcardSearch()
     CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));
 }
 
+void TestTextSearch::testApostropheSearch()
+{
+    // A) find typographic apostrophes also by using ASCII apostrophe in searchString
+    OUString str( u"It\u2019s an apostrophe." );
+    sal_Int32 startPos = 0, endPos = str.getLength();
+
+    // set options
+    util::SearchOptions aOptions;
+    aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
+    aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
+    aOptions.searchString = "'";
+    m_xSearch->setOptions( aOptions );
+
+    util::SearchResult aRes;
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // check with transliteration
+    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
+                                | TransliterationFlags::IGNORE_WIDTH);
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // B) search ASCII apostrophe in a text with ASCII apostrophes
+    str = str.replace(u'\u2019', '\'');
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // C) search typographic apostrophe in a text with ASCII apostrophes (no result)
+    aOptions.searchString = OUString(u"\u2019");
+    m_xSearch->setOptions( aOptions );
+
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+    // D) search typographic apostrophe in a text with typographic apostrophes
+    str = str.replace('\'', u'\u2019');
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // E) search mixed apostrophes in a text with mixed apostrophes:
+    aOptions.searchString = OUString(u"'\u2019");
+    m_xSearch->setOptions( aOptions );
+    str = u"test: \u2019'";
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, str.getLength());
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+
+    // F) search mixed apostrophes in a text with ASCII apostrophes:
+    str = u"test: ''";
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, str.getLength());
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+}
+
 void TestTextSearch::setUp()
 {
     BootstrapFixtureBase::setUp();
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 1c74a085a03d..bb7a2fffac40 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -130,6 +130,8 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
     maWildcardReversePattern.clear();
     maWildcardReversePattern2.clear();
     TransliterationFlags transliterateFlags = static_cast<TransliterationFlags>(aSrchPara.transliterateFlags);
+    bSearchApostrophe = false;
+    bool bReplaceApostrophe = false;
     if (aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
     {
         // RESrchPrepare will consider aSrchPara.transliterateFlags when
@@ -140,6 +142,11 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
         // match is not case-altered, leave case-(in)sensitive to regex engine.
         transliterateFlags &= ~TransliterationFlags::IGNORE_CASE;
     }
+    else if ( aSrchPara.searchString.indexOf('\'') > - 1 )
+    {
+        bSearchApostrophe = true;
+        bReplaceApostrophe = aSrchPara.searchString.indexOf(u'\u2019') > -1;
+    }
 
     // Create Transliteration class
     if( isSimpleTrans( transliterateFlags) )
@@ -217,6 +224,9 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
 
+    if ( bReplaceApostrophe )
+        sSrchStr = sSrchStr.replace(u'\u2019', '\'');
+
     // Take the new SearchOptions2::AlgorithmType2 field and ignore
     // SearchOptions::algorithmType
     switch( aSrchPara.AlgorithmType2)
@@ -315,6 +325,10 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
 
     OUString in_str(searchStr);
 
+    // in non-regex mode, allow searching typographical apostrophe with the ASCII one
+    // to avoid regression after using automatic conversion to U+2019 during typing in Writer
+    bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1;
+
     bUsePrimarySrchStr = true;
 
     if ( xTranslit.is() )
@@ -344,6 +358,9 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
         css::uno::Sequence<sal_Int32> offset(nInEndPos - nInStartPos);
         in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset);
 
+        if ( bReplaceApostrophe )
+            in_str = in_str.replace(u'\u2019', '\'');
+
         // JP 20.6.2001: also the start and end positions must be corrected!
         sal_Int32 newStartPos =
             (startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
@@ -385,6 +402,9 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
     }
     else
     {
+        if ( bReplaceApostrophe )
+            in_str = in_str.replace(u'\u2019', '\'');
+
         sres = (this->*fnForward)( in_str, startPos, endPos );
     }
 
@@ -440,6 +460,10 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
 
     OUString in_str(searchStr);
 
+    // in non-regex mode, allow searching typographical apostrophe with the ASCII one
+    // to avoid regression after using automatic conversion to U+2019 during typing in Writer
+    bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1;
+
     bUsePrimarySrchStr = true;
 
     if ( xTranslit.is() )
@@ -448,6 +472,9 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
         css::uno::Sequence<sal_Int32> offset(startPos - endPos);
         in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
 
+        if ( bReplaceApostrophe )
+            in_str = in_str.replace(u'\u2019', '\'');
+
         // JP 20.6.2001: also the start and end positions must be corrected!
         sal_Int32 const newStartPos = (startPos < searchStr.getLength())
             ? FindPosInSeq_Impl( offset, startPos )
@@ -493,6 +520,9 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
     }
     else
     {
+        if ( bReplaceApostrophe )
+            in_str = in_str.replace(u'\u2019', '\'');
+
         sres = (this->*fnBackward)( in_str, startPos, endPos );
     }
 
diff --git a/i18npool/source/search/textsearch.hxx b/i18npool/source/search/textsearch.hxx
index aa4c8f522f86..fac6e655c03d 100644
--- a/i18npool/source/search/textsearch.hxx
+++ b/i18npool/source/search/textsearch.hxx
@@ -68,6 +68,9 @@ class TextSearch: public cppu::WeakImplHelper
     FnSrch fnForward;
     FnSrch fnBackward;
 
+    // to fix UX regression, U+0027 matches also U+2019 in non-regex search
+    bool bSearchApostrophe;
+
     // Members and methods for the normal (Boyer-Moore) search
     std::unique_ptr<TextSearchJumpTable> pJumpTable;
     std::unique_ptr<TextSearchJumpTable> pJumpTable2;


More information about the Libreoffice-commits mailing list