[Libreoffice-commits] core.git: configmgr/source include/rtl sal/rtl sal/textenc sax/source stoc/source svtools/source tools/source

Wed Sep 13 11:40:43 UTC 2017

configmgr/source/access.cxx                                       |    4 -
 include/rtl/character.hxx                                         |   27 ++++++++++
 sal/rtl/uri.cxx                                                   |    5 -
 sal/rtl/ustrbuf.cxx                                               |    2 
 sal/textenc/tcvtutf8.cxx                                          |    5 -
 sax/source/expatwrap/saxwriter.cxx                                |    4 -
 stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx |    4 -
 svtools/source/svrtf/svparser.cxx                                 |    2 
 tools/source/fsys/urlobj.cxx                                      |    6 --
 9 files changed, 40 insertions(+), 19 deletions(-)

New commits:
commit 3f0fba004badec01e536ca0fe2889e7e0bb93d7a
Author: Stephan Bergmann <sbergman at redhat.com>
Date:   Wed Sep 13 13:35:49 2017 +0200

    New rtl::isUnicodeScalarValue, rtl::isSurrogate
    
    There are apparently various places that want to check for a Unicode scalar
    value rather than for a Unicode code point.  Changed those uses of
    rtl::isUnicodeCodePoint where that was obvious.  (For changing
    svtools/source/svrtf/svparser.cxx see 8e0fb74dc01927b60d8b868548ef8fe1d7a80ce3
    "Revert 'svtools: HTML import: don't put lone surrogates in OUString'".)  Other
    uses of rtl::isUnicodeCodePoint might also want to use rtl::isUnicodeScalarValue
    instead.
    
    As a side effect, this change also introduces rtl::isSurrogate, which is useful
    in a few places as well.
    
    Change-Id: I9245f4f98b83877145a4d392f0ddb7c5d824a535

diff --git a/configmgr/source/access.cxx b/configmgr/source/access.cxx
index 6eb692cc5430..0d19af353b49 100644
--- a/configmgr/source/access.cxx
+++ b/configmgr/source/access.cxx
@@ -113,8 +113,8 @@ bool isValidName(OUString const & name, bool setMember) {
     for (sal_Int32 i = 0; i != name.getLength();) {
         sal_uInt32 c = name.iterateCodePoints(&i);
         if ((c < 0x20 && !(c == 0x09 || c == 0x0A || c == 0x0D))
-            || rtl::isHighSurrogate(c) || rtl::isLowSurrogate(c) || c == 0xFFFE
-            || c == 0xFFFF || (!setMember && c == '/'))
+            || rtl::isSurrogate(c) || c == 0xFFFE || c == 0xFFFF
+            || (!setMember && c == '/'))
         {
             return false;
         }
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index b83121a4a6d4..ee26f4ae1f9a 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -335,6 +335,20 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
 }
 /// @endcond
 
+/** Check for surrogate.
+
+    @param code  A Unicode code point.
+
+    @return  True if code is a surrogate code point (0xD800--0xDFFF).
+
+    @since LibreOffice 6.0
+*/
+inline bool isSurrogate(sal_uInt32 code) {
+    assert(isUnicodeCodePoint(code));
+    return code >= detail::surrogatesHighFirst
+        && code <= detail::surrogatesLowLast;
+}
+
 /** Check for high surrogate.
 
     @param code  A Unicode code point.
@@ -433,6 +447,19 @@ inline std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode * output) {
     }
 }
 
+/** Check for Unicode scalar value.
+
+    @param code  An integer.
+
+    @return  True if code is a Unicode scalar value.
+
+    @since LibreOffice 6.0
+*/
+inline bool isUnicodeScalarValue(sal_uInt32 code)
+{
+    return isUnicodeCodePoint(code) && !isSurrogate(code);
+}
+
 }
 
 #endif
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index 257a0a27abac..57a7102a38eb 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -133,9 +133,8 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                     p += 3;
                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
                 }
-                if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
-                    && nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded)
-                    && !rtl::isLowSurrogate(nEncoded))
+                if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
+                    && nEncoded >= nMin)
                 {
                     *pBegin = p;
                     *pType = EscapeChar;
diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx
index cef31a5da2ea..508c8d7a9c7c 100644
--- a/sal/rtl/ustrbuf.cxx
+++ b/sal/rtl/ustrbuf.cxx
@@ -168,7 +168,7 @@ void rtl_uStringbuffer_insertUtf32(
 {
     sal_Unicode buf[2];
     sal_Int32 len;
-    OSL_ASSERT(rtl::isUnicodeCodePoint(c) && !(c >= 0xD800 && c <= 0xDFFF));
+    OSL_ASSERT(rtl::isUnicodeScalarValue(c));
     if (c <= 0xFFFF) {
         buf[0] = (sal_Unicode) c;
         len = 1;
diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx
index d9c3c134a83a..f5ed9dfb995c 100644
--- a/sal/textenc/tcvtutf8.cxx
+++ b/sal/textenc/tcvtutf8.cxx
@@ -170,10 +170,7 @@ sal_Size ImplConvertUtf8ToUnicode(
                 }
                 break;
             case 3:
-                if (nUtf32 < 0x800
-                    || (!bJavaUtf8
-                        && (rtl::isHighSurrogate(nUtf32)
-                            || rtl::isLowSurrogate(nUtf32))))
+                if (nUtf32 < 0x800 || (!bJavaUtf8 && rtl::isSurrogate(nUtf32)))
                 {
                     goto bad_input;
                 }
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
index 71d3fba62547..8303afa145d7 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -406,7 +406,7 @@ inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
             OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
 
             nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
-            if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
+            if( rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000 )
             {
                 sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
                                       sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
@@ -851,7 +851,7 @@ inline sal_Int32 calcXMLByteLength( const OUString& rStr,
         {
             // 2. surrogate: write as UTF-8 (if range is OK
             nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
-            if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
+            if( rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000 )
                 nOutputLength += 4;
             nSurrogate = 0;
         }
diff --git a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
index c57670e27f20..6df3cd3e33ca 100644
--- a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
+++ b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
@@ -110,8 +110,8 @@ OUString parsePart(
                     }
                     encoded |= (n & 0x3F) << shift;
                 }
-                if (!utf8 || !rtl::isUnicodeCodePoint(encoded) || encoded < min
-                    || (encoded >= 0xD800 && encoded <= 0xDFFF))
+                if (!utf8 || !rtl::isUnicodeScalarValue(encoded)
+                    || encoded < min)
                 {
                     break;
                 }
diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx
index 541aa5276c2d..2d1be0e3e405 100644
--- a/svtools/source/svrtf/svparser.cxx
+++ b/svtools/source/svrtf/svparser.cxx
@@ -423,7 +423,7 @@ sal_uInt32 SvParser<T>::GetNextChar()
         while( 0 == nChars  && !bErr );
     }
 
-    if ( ! rtl::isUnicodeCodePoint( c ) )
+    if ( ! rtl::isUnicodeScalarValue( c ) )
         c = '?' ;
 
     if( bErr )
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index eeadb7df38d2..ccebbd3f4e2a 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -4745,10 +4745,8 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
                                         break;
                                     nShift -= 6;
                                 }
-                                if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
-                                    && nEncoded >= nMin
-                                    && !rtl::isHighSurrogate(nEncoded)
-                                    && !rtl::isLowSurrogate(nEncoded))
+                                if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
+                                    && nEncoded >= nMin)
                                 {
                                     rBegin = p;
                                     nUTF32 = nEncoded;