[Libreoffice-commits] core.git: sc/inc sc/source

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Thu Nov 29 10:31:10 UTC 2018


 sc/inc/scfuncs.hrc               |    4 
 sc/source/core/tool/interpr1.cxx |  206 ++++++++++++++++++++++++++-------------
 2 files changed, 140 insertions(+), 70 deletions(-)

New commits:
commit e3af4947fd4b8d1411212775e8ffe42e330364c3
Author:     Eike Rathke <erack at redhat.com>
AuthorDate: Thu Nov 29 01:11:57 2018 +0100
Commit:     Eike Rathke <erack at redhat.com>
CommitDate: Thu Nov 29 11:30:40 2018 +0100

    Support occurrence number as REGEX() 4th argument, tdf#113977 follow-up
    
    REGEX( Text ; Expression [ ; [ Replacement ] [ ; Flags|Occurrence ] ] )
    
    REGEX(Text;Expression) extracts the first match of Expression in
    Text. If there is no match, #N/A is returned.
    
    REGEX(Text;Expression;Replacement) replaces the first match of
    Expression in Text, not extracted. If there is no match, Text is
    returned unmodified.
    
    REGEX(Text;Expression;Replacement;"g") replaces all matches of
    Expression in Text with Replacement, not extracted. If there is no
    match, Text is returned unmodified.
    
    REGEX(Text;Expression;;Occurrence) extracts the n-th match of
    Expression in Text. If there is no n-th match, #N/A is returned.
    If Occurrence is 0, Text is returned unmodified.
    
    REGEX(Text;Expression;Replacement;Occurrence) replaces the n-th
    match of Expression in Text with Replacement, not extracted. If
    there is no n-th match, Text is returned unmodified. If Occurrence
    is 0, Text is returned unmodified.
    
    Change-Id: Iadb705e4c76415c57bf510489410ec029344cca7
    Reviewed-on: https://gerrit.libreoffice.org/64199
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Jenkins

diff --git a/sc/inc/scfuncs.hrc b/sc/inc/scfuncs.hrc
index e09a12f4e4c7..c08df5adc072 100644
--- a/sc/inc/scfuncs.hrc
+++ b/sc/inc/scfuncs.hrc
@@ -3826,8 +3826,8 @@ const char* SC_OPCODE_REGEX_ARY[] =
     NC_("SC_OPCODE_REGEX", "The regular expression pattern to be matched."),
     NC_("SC_OPCODE_REGEX", "Replacement"),
     NC_("SC_OPCODE_REGEX", "The replacement text and references to capture groups."),
-    NC_("SC_OPCODE_REGEX", "Flags"),
-    NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement.")
+    NC_("SC_OPCODE_REGEX", "Flags or Occurrence"),
+    NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement. Or number of occurrence to match or replace.")
 };
 
 // -=*# Resource for function BASE #*=-
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index c659f0f93ae3..a07f5a7ce1c6 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -9226,17 +9226,48 @@ void ScInterpreter::ScSearch()
 
 void ScInterpreter::ScRegex()
 {
-    sal_uInt8 nParamCount = GetByte();
-    if (MustHaveParamCount( nParamCount, 2, 4))
+    const sal_uInt8 nParamCount = GetByte();
+    if (!MustHaveParamCount( nParamCount, 2, 4))
+        return;
+
+    // Flags are supported only for replacement, search match flags can be
+    // individually and much more flexible set in the regular expression
+    // pattern using (?ismwx-ismwx)
+    bool bGlobalReplacement = false;
+    sal_Int32 nOccurrence = 1;  // default first occurrence, if any
+    if (nParamCount == 4)
     {
-        // Flags are supported only for replacement, search match flags can be
-        // individually and much more flexible set in the regular expression
-        // pattern using (?ismwx-ismwx)
-        bool bGlobalReplacement = false;
-        if (nParamCount == 4)
+        // Argument can be either string or double.
+        double fOccurrence;
+        svl::SharedString aFlagsString;
+        bool bDouble;
+        if (!IsMissing())
+            bDouble = GetDoubleOrString( fOccurrence, aFlagsString);
+        else
+        {
+            // For an omitted argument keep the default.
+            PopError();
+            bDouble = true;
+            fOccurrence = nOccurrence;
+        }
+        if (nGlobalError != FormulaError::NONE)
+        {
+            PushError( nGlobalError);
+            return;
+        }
+        if (bDouble)
+        {
+            if (!CheckStringPositionArgument( fOccurrence))
+            {
+                PushError( FormulaError::IllegalArgument);
+                return;
+            }
+            nOccurrence = static_cast<sal_Int32>(fOccurrence);
+        }
+        else
         {
+            const OUString aFlags( aFlagsString.getString());
             // Empty flags string is valid => no flag set.
-            OUString aFlags( GetString().getString());
             if (aFlags.getLength() > 1)
             {
                 // Only one flag supported.
@@ -9255,87 +9286,126 @@ void ScInterpreter::ScRegex()
                 }
             }
         }
+    }
 
-        bool bReplacement = false;
-        OUString aReplacement;
-        if (nParamCount >= 3)
+    bool bReplacement = false;
+    OUString aReplacement;
+    if (nParamCount >= 3)
+    {
+        // A missing argument is not an empty string to replace the match.
+        // nOccurrence==0 forces no replacement, so simply discard the
+        // argument.
+        if (IsMissing() || nOccurrence == 0)
+            PopError();
+        else
         {
-            // A missing argument is not an empty string to replace the match.
-            if (IsMissing())
-                Pop();
-            else
-            {
-                aReplacement = GetString().getString();
-                bReplacement = true;
-            }
+            aReplacement = GetString().getString();
+            bReplacement = true;
         }
-        // If bGlobalReplacement==true and bReplacement==false then
-        // bGlobalReplacement is silently ignored.
+    }
+    // If bGlobalReplacement==true and bReplacement==false then
+    // bGlobalReplacement is silently ignored.
 
-        OUString aExpression = GetString().getString();
-        OUString aText = GetString().getString();
+    OUString aExpression = GetString().getString();
+    OUString aText = GetString().getString();
 
-        if (nGlobalError != FormulaError::NONE)
-        {
-            PushError( nGlobalError);
-            return;
-        }
+    if (nGlobalError != FormulaError::NONE)
+    {
+        PushError( nGlobalError);
+        return;
+    }
 
-        const icu::UnicodeString aIcuExpression(
-                reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength());
-        UErrorCode status = U_ZERO_ERROR;
-        icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status);
+    // 0-th match or replacement is none, return original string early.
+    if (nOccurrence == 0)
+    {
+        PushString( aText);
+        return;
+    }
+
+    const icu::UnicodeString aIcuExpression(
+            reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength());
+    UErrorCode status = U_ZERO_ERROR;
+    icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status);
+    if (U_FAILURE(status))
+    {
+        // Invalid regex.
+        PushIllegalArgument();
+        return;
+    }
+    // Guard against pathological patterns, limit steps of engine, see
+    // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
+    aRegexMatcher.setTimeLimit( 23*1000, status);
+
+    const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
+    aRegexMatcher.reset( aIcuText);
+
+    if (!bReplacement)
+    {
+        // Find n-th occurrence.
+        sal_Int32 nCount = 0;
+        while (aRegexMatcher.find( status) && U_SUCCESS(status) && ++nCount < nOccurrence)
+            ;
         if (U_FAILURE(status))
         {
-            // Invalid regex.
+            // Some error.
             PushIllegalArgument();
             return;
         }
-        // Guard against pathological patterns, limit steps of engine, see
-        // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
-        aRegexMatcher.setTimeLimit ( 23*1000, status);
-
-        const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
-        aRegexMatcher.reset( aIcuText);
-
-        if (!bReplacement)
+        // n-th match found?
+        if (nCount != nOccurrence)
         {
-            // Find first occurrence.
-            if (!aRegexMatcher.find())
-            {
-                PushError( FormulaError::NotAvailable);
-                return;
-            }
-            // Extract matched text.
-            icu::UnicodeString aMatch( aRegexMatcher.group( status));
-            if (U_FAILURE(status))
-            {
-                // Some error.
-                PushIllegalArgument();
-                return;
-            }
-            OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length());
-            PushString( aResult);
+            PushError( FormulaError::NotAvailable);
             return;
         }
-
-        // Replace first occurrence of match with replacement.
-        const icu::UnicodeString aIcuReplacement(
-                reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
-        icu::UnicodeString aReplaced;
-        if (bGlobalReplacement)
-            aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status);
-        else
-            aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status);
+        // Extract matched text.
+        icu::UnicodeString aMatch( aRegexMatcher.group( status));
         if (U_FAILURE(status))
         {
-            // Some error, e.g. extraneous $1 without group.
+            // Some error.
             PushIllegalArgument();
             return;
         }
-        OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length());
+        OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length());
         PushString( aResult);
+        return;
+    }
+
+    const icu::UnicodeString aIcuReplacement(
+            reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
+    icu::UnicodeString aReplaced;
+    if (bGlobalReplacement)
+        // Replace all occurrences of match with replacement.
+        aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status);
+    else if (nOccurrence == 1)
+        // Replace first occurrence of match with replacement.
+        aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status);
+    else
+    {
+        // Replace n-th occurrence of match with replacement.
+        sal_Int32 nCount = 0;
+        while (aRegexMatcher.find( status) && U_SUCCESS(status))
+        {
+            // XXX NOTE: After several RegexMatcher::find() the
+            // RegexMatcher::appendReplacement() still starts at the
+            // beginning (or after the last appendReplacement() position
+            // which is none here) and copies the original text up to the
+            // current found match and then replaces the found match.
+            if (++nCount == nOccurrence)
+            {
+                aRegexMatcher.appendReplacement( aReplaced, aIcuReplacement, status);
+                break;
+            }
+        }
+        aRegexMatcher.appendTail( aReplaced);
+    }
+    if (U_FAILURE(status))
+    {
+        // Some error, e.g. extraneous $1 without group.
+        PushIllegalArgument();
+        return;
     }
+    OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length());
+    PushString( aResult);
 }
 
 void ScInterpreter::ScMid()


More information about the Libreoffice-commits mailing list