[Libreoffice-commits] core.git: sc/source
Libreoffice Gerrit user
logerrit at kemper.freedesktop.org
Tue Oct 30 01:35:51 UTC 2018
sc/source/core/tool/interpr1.cxx | 67 +++++++++++++++++++++++++++------------
1 file changed, 47 insertions(+), 20 deletions(-)
New commits:
commit 9574076674acfed2e7f24a86929c10b7a539a35c
Author: Eike Rathke <erack at redhat.com>
AuthorDate: Mon Oct 29 23:49:22 2018 +0100
Commit: Eike Rathke <erack at redhat.com>
CommitDate: Tue Oct 30 02:35:28 2018 +0100
Rewrite REGEX() to directly use icu::RegexMatcher, tdf#113977
This not only avoids all the overhead of i18npool's TextSearch and
Transliteration efforts (which are useful in the Find&Replace
dialog bu we don't use here) that could possibly even get in the
way how they are implemented, but also gives us direct control
over all possible flags; plus ICU's replace features are more
convenient to use and have better error checking to report back
and there is the advantage that ICU's regex description can be
taken verbatim.
Change-Id: Iaa06c26a3ff6882a0057d1ca92b1605073429bca
Reviewed-on: https://gerrit.libreoffice.org/62531
Reviewed-by: Eike Rathke <erack at redhat.com>
Tested-by: Eike Rathke <erack at redhat.com>
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index 6c3012ad2ad9..3123335ea3b9 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -37,6 +37,7 @@
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
#include <unicode/uchar.h>
+#include <unicode/regex.h>
#include <patattr.hxx>
#include <global.hxx>
@@ -9242,30 +9243,56 @@ void ScInterpreter::ScRegex()
return;
}
- sal_Int32 nPos = 0;
- sal_Int32 nEndPos = aText.getLength();
- utl::SearchParam aParam( aExpression, utl::SearchParam::SearchType::Regexp);
- css::util::SearchResult aResult;
- utl::TextSearch aSearch( aParam, *ScGlobal::pCharClass);
- const bool bMatch = aSearch.SearchForward( aText, &nPos, &nEndPos, &aResult);
- if (!bMatch)
- PushError( FormulaError::NotAvailable);
- else
+ const icu::UnicodeString aIcuExpression(
+ reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength());
+ UErrorCode status = U_ZERO_ERROR;
+ icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status);
+ if (U_FAILURE(status))
{
- assert(aResult.subRegExpressions >= 1);
- if (!bReplacement)
- PushString( aText.copy( aResult.startOffset[0], aResult.endOffset[0] - aResult.startOffset[0]));
- else
+ // Invalid regex.
+ PushIllegalArgument();
+ return;
+ }
+ // Guard against pathological patterns, limit steps of engine, see
+ // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
+ aRegexMatcher.setTimeLimit ( 23*1000, status);
+
+ const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
+ aRegexMatcher.reset( aIcuText);
+
+ if (!bReplacement)
+ {
+ // Find first occurrence.
+ if (!aRegexMatcher.find())
+ {
+ PushError( FormulaError::NotAvailable);
+ return;
+ }
+ // Extract matched text.
+ icu::UnicodeString aMatch( aRegexMatcher.group( status));
+ if (U_FAILURE(status))
{
- /* TODO: global replacement of multiple occurrences, introduce
- * extra parameter with flag 'g'? Loop over positions after
- * nEndPos until none left? How to keep the offsets in sync
- * after replacement? That should be done by
- * ReplaceBackReferences(). */
- aSearch.ReplaceBackReferences( aReplacement, aText, aResult);
- PushString( aReplacement);
+ // Some error.
+ PushIllegalArgument();
+ return;
}
+ OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length());
+ PushString( aResult);
+ return;
+ }
+
+ // Replace first occurrence of match with replacement.
+ const icu::UnicodeString aIcuReplacement(
+ reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
+ icu::UnicodeString aReplaced( aRegexMatcher.replaceFirst( aIcuReplacement, status));
+ if (U_FAILURE(status))
+ {
+ // Some error, e.g. extraneous $1 without group.
+ PushIllegalArgument();
+ return;
}
+ OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length());
+ PushString( aResult);
}
}
More information about the Libreoffice-commits
mailing list