[Libreoffice-commits] core.git: i18nutil/source include/rtl sal/rtl sal/textenc sax/source sc/source stoc/source svtools/source tools/source xmlreader/source

Mon Feb 15 11:37:32 UTC 2016

i18nutil/source/utility/unicode.cxx                               |    4 
 include/rtl/character.hxx                                         |   60 +++++-----
 sal/rtl/uri.cxx                                                   |    8 -
 sal/rtl/ustrbuf.cxx                                               |    4 
 sal/rtl/ustring.cxx                                               |    2 
 sal/textenc/tcvtutf8.cxx                                          |    2 
 sal/textenc/unichars.hxx                                          |    9 +
 sax/source/expatwrap/saxwriter.cxx                                |    5 
 sc/source/core/tool/interpr1.cxx                                  |    2 
 stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx |    6 -
 svtools/source/svhtml/parhtml.cxx                                 |    2 
 svtools/source/svrtf/svparser.cxx                                 |    2 
 tools/source/fsys/urlobj.cxx                                      |    4 
 tools/source/inet/inetmime.cxx                                    |    6 -
 xmlreader/source/xmlreader.cxx                                    |    9 -
 15 files changed, 65 insertions(+), 60 deletions(-)

New commits:
commit 3210bc85ae1276350f18f4795efefe491c2206c2
Author: Stephan Bergmann <sbergman at redhat.com>
Date:   Mon Feb 15 12:36:11 2016 +0100

    Rename rtl::isValidCodePoint -> rtl::isUnicodeCodePoint
    
    ...and fix its documentation, and use it throughout the code base.
    
    Change-Id: I349bc2009b1b0aa7115ea90bc6ecd0a812f63698

diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx
index a7d3d46..6507479 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -1190,14 +1190,14 @@ OUString ToggleUnicodeCodepoint::StringToReplace()
     {
         nUnicode = sIn.copy(0, nUPlus).toString().toUInt32(16);
         //prevent creating control characters or invalid Unicode values
-        if( nUnicode < 0x20 || nUnicode > 0x10ffff )
+        if( !rtl::isUnicodeCodePoint(nUnicode) || nUnicode < 0x20  )
             maInput = sIn.copy(nUPlus);
         sIn = sIn.copy(nUPlus+2);
         nUPlus =  sIn.indexOf("U+");
     }
 
     nUnicode = sIn.toString().toUInt32(16);
-    if( nUnicode < 0x20 || nUnicode > 0x10ffff )
+    if( !rtl::isUnicodeCodePoint(nUnicode) || nUnicode < 0x20 )
        maInput.truncate().append( sIn[sIn.getLength()-1] );
     return maInput.toString();
 }
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index 49f6803..ba3088e 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -29,6 +29,19 @@
 namespace rtl
 {
 
+/** Check for Unicode code point.
+
+    @param code  An integer.
+
+    @return  True if code is a Unicode code point.
+
+    @since LibreOffice 5.2
+*/
+inline bool isUnicodeCodePoint(sal_uInt32 code)
+{
+    return code <= 0x10FFFF;
+}
+
 /** Check for ASCII character.
 
     @param code  A Unicode code point.
@@ -39,7 +52,7 @@ namespace rtl
  */
 inline bool isAscii(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code <= 0x7F;
 }
 
@@ -54,7 +67,7 @@ inline bool isAscii(sal_uInt32 code)
  */
 inline bool isAsciiLowerCase(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code >= 'a' && code <= 'z';
 }
 
@@ -69,7 +82,7 @@ inline bool isAsciiLowerCase(sal_uInt32 code)
  */
 inline bool isAsciiUpperCase(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code >= 'A' && code <= 'Z';
 }
 
@@ -84,7 +97,7 @@ inline bool isAsciiUpperCase(sal_uInt32 code)
  */
 inline bool isAsciiAlpha(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return isAsciiLowerCase(code) || isAsciiUpperCase(code);
 }
 
@@ -99,7 +112,7 @@ inline bool isAsciiAlpha(sal_uInt32 code)
  */
 inline bool isAsciiDigit(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code >= '0' && code <= '9';
 }
 
@@ -114,7 +127,7 @@ inline bool isAsciiDigit(sal_uInt32 code)
  */
 inline bool isAsciiAlphanumeric(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return isAsciiDigit(code) || isAsciiAlpha(code);
 }
 
@@ -129,7 +142,7 @@ inline bool isAsciiAlphanumeric(sal_uInt32 code)
  */
 inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
 }
 
@@ -144,7 +157,7 @@ inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
  */
 inline bool isAsciiHexDigit(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
 }
 
@@ -158,7 +171,7 @@ inline bool isAsciiHexDigit(sal_uInt32 code)
  */
 inline bool isAsciiOctalDigit(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code >= '0' && code <= '7';
 }
 
@@ -173,7 +186,7 @@ inline bool isAsciiOctalDigit(sal_uInt32 code)
 */
 inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return isAsciiLowerCase(code) ? code - 32 : code;
 }
 
@@ -187,7 +200,7 @@ inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
 */
 inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
 {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return isAsciiUpperCase(code) ? code + 32 : code;
 }
 
@@ -205,8 +218,8 @@ inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
  */
 inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
 {
-    assert(code1 <= 0x10FFFF);
-    assert(code2 <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code1));
+    assert(isUnicodeCodePoint(code2));
     return static_cast<sal_Int32>(toAsciiLowerCase(code1))
         - static_cast<sal_Int32>(toAsciiLowerCase(code2));
 }
@@ -222,19 +235,6 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
 }
 /// @endcond
 
-/** Check if a codepoint is accessible via utf16 per RFC3629
-
-    @param code  A non-BMP Unicode code point.
-
-    @return  True if the code is a valid codepoint.
-
-    @since LibreOffice 5.2
-*/
-inline bool isValidCodePoint( sal_uInt32 code)
-{
-    return code <= 0x10FFFF;
-}
-
 /** Check for high surrogate.
 
     @param code  A Unicode code point.
@@ -244,7 +244,7 @@ inline bool isValidCodePoint( sal_uInt32 code)
     @since LibreOffice 5.0
 */
 inline bool isHighSurrogate(sal_uInt32 code) {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code >= detail::surrogatesHighFirst
         && code <= detail::surrogatesHighLast;
 }
@@ -258,7 +258,7 @@ inline bool isHighSurrogate(sal_uInt32 code) {
     @since LibreOffice 5.0
 */
 inline bool isLowSurrogate(sal_uInt32 code) {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     return code >= detail::surrogatesLowFirst
         && code <= detail::surrogatesLowLast;
 }
@@ -272,7 +272,7 @@ inline bool isLowSurrogate(sal_uInt32 code) {
     @since LibreOffice 5.0
  */
 inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     assert(code >= 0x10000);
     return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
 }
@@ -286,7 +286,7 @@ inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
     @since LibreOffice 5.0
  */
 inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
-    assert(code <= 0x10FFFF);
+    assert(isUnicodeCodePoint(code));
     assert(code >= 0x10000);
     return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
 }
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index b028b3c..0f3d6df 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -132,8 +132,8 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                     p += 3;
                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
                 }
-                if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
-                    && !rtl::isHighSurrogate(nEncoded)
+                if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
+                    && nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded)
                     && !rtl::isLowSurrogate(nEncoded))
                 {
                     *pBegin = p;
@@ -213,7 +213,7 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
 
 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
 {
-    assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
+    assert(rtl::isUnicodeCodePoint(nUtf32));
     if (nUtf32 <= 0xFFFF) {
         writeUnicode(
             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
@@ -245,7 +245,7 @@ void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
 {
-    assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
+    assert(rtl::isUnicodeCodePoint(nUtf32));
     if (eCharset == RTL_TEXTENCODING_UTF8) {
         if (nUtf32 < 0x80)
             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx
index 89b897f..b73318b 100644
--- a/sal/rtl/ustrbuf.cxx
+++ b/sal/rtl/ustrbuf.cxx
@@ -21,7 +21,7 @@
 
 #include <osl/interlck.h>
 #include <osl/diagnose.h>
-
+#include <rtl/character.hxx>
 #include <rtl/ustrbuf.hxx>
 #include <strimp.hxx>
 
@@ -169,7 +169,7 @@ void rtl_uStringbuffer_insertUtf32(
 {
     sal_Unicode buf[2];
     sal_Int32 len;
-    OSL_ASSERT(c <= 0x10FFFF && !(c >= 0xD800 && c <= 0xDFFF));
+    OSL_ASSERT(rtl::isUnicodeCodePoint(c) && !(c >= 0xD800 && c <= 0xDFFF));
     if (c <= 0xFFFF) {
         buf[0] = (sal_Unicode) c;
         len = 1;
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index 16e9b87..db07cab 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -578,7 +578,7 @@ void SAL_CALL rtl_uString_newFromCodePoints(
     }
     n = codePointCount;
     for (i = 0; i < codePointCount; ++i) {
-        OSL_ASSERT(codePoints[i] <= 0x10FFFF);
+        OSL_ASSERT(rtl::isUnicodeCodePoint(codePoints[i]));
         if (codePoints[i] >= 0x10000) {
             ++n;
         }
diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx
index 1f0b2bf..f9c9879 100644
--- a/sal/textenc/tcvtutf8.cxx
+++ b/sal/textenc/tcvtutf8.cxx
@@ -163,7 +163,7 @@ sal_Size ImplConvertUtf8ToUnicode(
                     *pDestBufPtr++ = (sal_Unicode) nUtf32;
                 else
                     goto no_output;
-            else if (nUtf32 <= 0x10FFFF)
+            else if (rtl::isUnicodeCodePoint(nUtf32))
                 if (pDestBufEnd - pDestBufPtr >= 2)
                 {
                     *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32);
diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx
index 09652b9..0bcd6f7 100644
--- a/sal/textenc/unichars.hxx
+++ b/sal/textenc/unichars.hxx
@@ -20,9 +20,12 @@
 #ifndef INCLUDED_SAL_TEXTENC_UNICHARS_HXX
 #define INCLUDED_SAL_TEXTENC_UNICHARS_HXX
 
-#include "sal/config.h"
+#include <sal/config.h>
+
 #include <cassert>
-#include "sal/types.h"
+
+#include <rtl/character.hxx>
+#include <sal/types.h>
 
 #define RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER 0xFFFD
 
@@ -30,7 +33,7 @@ inline bool ImplIsNoncharacter(sal_uInt32 nUtf32)
 {
     return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF)
            || (nUtf32 & 0xFFFF) >= 0xFFFE
-           || nUtf32 > 0x10FFFF;
+           || !rtl::isUnicodeCodePoint(nUtf32);
 }
     // All code points that are noncharacters, as of Unicode 3.1.1.
 
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
index 09a7855..db75efe 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -39,6 +39,7 @@
 #include <cppuhelper/supportsservice.hxx>
 
 #include <osl/diagnose.h>
+#include <rtl/character.hxx>
 #include <rtl/ref.hxx>
 #include <rtl/ustrbuf.hxx>
 
@@ -388,7 +389,7 @@ inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
             OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
 
             nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
-            if( nSurrogate >= 0x00010000  &&  nSurrogate <= 0x0010FFFF )
+            if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
             {
                 sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
                                       sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
@@ -831,7 +832,7 @@ inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen,
         {
             // 2. surrogate: write as UTF-8 (if range is OK
             nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
-            if( nSurrogate >= 0x00010000  &&  nSurrogate <= 0x0010FFFF )
+            if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
                 nOutputLength += 4;
             nSurrogate = 0;
         }
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index 266b0d7..d43f5f5 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -3324,7 +3324,7 @@ void ScInterpreter::ScUnichar()
     if ( MustHaveParamCount( GetByte(), 1 ) )
     {
         double dVal = ::rtl::math::approxFloor( GetDouble() );
-        if ((dVal < 0x000000) || (dVal > 0x10FFFF))
+        if (dVal < 0 || !rtl::isUnicodeCodePoint(dVal))
             PushIllegalArgument();
         else
         {
diff --git a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
index b2f6c6d..eaf8741 100644
--- a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
+++ b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
@@ -34,6 +34,7 @@
 #include <cppuhelper/supportsservice.hxx>
 #include <cppuhelper/weak.hxx>
 #include <osl/mutex.hxx>
+#include <rtl/character.hxx>
 #include <rtl/uri.hxx>
 #include <rtl/ustrbuf.hxx>
 #include <rtl/ustring.hxx>
@@ -110,9 +111,8 @@ OUString parsePart(
                     }
                     encoded |= (n & 0x3F) << shift;
                 }
-                if (!utf8 || encoded < min
-                    || (encoded >= 0xD800 && encoded <= 0xDFFF)
-                    || encoded > 0x10FFFF)
+                if (!utf8 || !rtl::isUnicodeCodePoint(encoded) || encoded < min
+                    || (encoded >= 0xD800 && encoded <= 0xDFFF))
                 {
                     break;
                 }
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index a8eff6d..d1a081b 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -502,7 +502,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
                     else
                         nNextCh = 0U;
 
-                    if ( ! rtl::isValidCodePoint( cChar ) )
+                    if ( ! rtl::isUnicodeCodePoint( cChar ) )
                         cChar = '?';
                 }
                 else if( HTML_ISALPHA( nNextCh ) )
diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx
index b862e66..3350492 100644
--- a/svtools/source/svrtf/svparser.cxx
+++ b/svtools/source/svrtf/svparser.cxx
@@ -394,7 +394,7 @@ sal_uInt32 SvParser::GetNextChar()
         while( 0 == nChars  && !bErr );
     }
 
-    if ( ! rtl::isValidCodePoint( c ) )
+    if ( ! rtl::isUnicodeCodePoint( c ) )
         c = (sal_uInt32) '?' ;
 
     if( bErr )
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 6ebb517..03e550e 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -4744,8 +4744,8 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
                                         break;
                                     nShift -= 6;
                                 }
-                                if (bUTF8 && nEncoded >= nMin
-                                    && nEncoded <= 0x10FFFF
+                                if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
+                                    && nEncoded >= nMin
                                     && !rtl::isHighSurrogate(nEncoded)
                                     && !rtl::isLowSurrogate(nEncoded))
                                 {
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx
index 88b9f99..d0b638a 100644
--- a/tools/source/inet/inetmime.cxx
+++ b/tools/source/inet/inetmime.cxx
@@ -270,7 +270,7 @@ sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
 inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
                                                  sal_uInt32 nUTF32)
 {
-    DBG_ASSERT(nUTF32 <= 0x10FFFF, "putUTF32Character(): Bad char");
+    DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
     if (nUTF32 < 0x10000)
         *pBuffer++ = sal_Unicode(nUTF32);
     else
@@ -375,7 +375,7 @@ bool translateUTF8Char(const sal_Char *& rBegin,
         else
             return false;
 
-    if (nUCS4 < nMin || nUCS4 > 0x10FFFF)
+    if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
         return false;
 
     if (eEncoding >= RTL_TEXTENCODING_UCS4)
@@ -1279,7 +1279,7 @@ void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer)
                         if (bEscape)
                         {
                             DBG_ASSERT(
-                                nUTF32 < 0x10FFFF,
+                                rtl::isUnicodeCodePoint(nUTF32),
                                 "INetMIMEEncodedWordOutputSink::finish():"
                                     " Bad char");
                             if (nUTF32 < 0x80)
diff --git a/xmlreader/source/xmlreader.cxx b/xmlreader/source/xmlreader.cxx
index 011a094..25b5684 100644
--- a/xmlreader/source/xmlreader.cxx
+++ b/xmlreader/source/xmlreader.cxx
@@ -28,6 +28,7 @@
 #include <com/sun/star/uno/RuntimeException.hpp>
 #include <com/sun/star/uno/XInterface.hpp>
 #include <osl/file.h>
+#include <rtl/character.hxx>
 #include <rtl/string.h>
 #include <rtl/ustring.hxx>
 #include <sal/log.hxx>
@@ -399,7 +400,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
     ++position;
     if (*position == '#') {
         ++position;
-        sal_Int32 val = 0;
+        sal_uInt32 val = 0;
         char const * p;
         if (*position == 'x') {
             ++position;
@@ -415,7 +416,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
                 } else {
                     break;
                 }
-                if (val > 0x10FFFF) { // avoid overflow
+                if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
                     throw css::uno::RuntimeException(
                         "'&#x...' too large in " + fileUrl_ );
                 }
@@ -429,7 +430,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
                 } else {
                     break;
                 }
-                if (val > 0x10FFFF) { // avoid overflow
+                if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
                     throw css::uno::RuntimeException(
                         "'&#...' too large in " + fileUrl_ );
                 }
@@ -439,7 +440,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
             throw css::uno::RuntimeException(
                 "'&#...' missing ';' in " + fileUrl_ );
         }
-        assert(val >= 0 && val <= 0x10FFFF);
+        assert(rtl::isUnicodeCodePoint(val));
         if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) ||
             (val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF)
         {