[Libreoffice-commits] core.git: include/rtl sal/rtl svl/source sw/source tools/source

Stephan Bergmann sbergman at redhat.com
Mon Apr 20 04:54:56 PDT 2015


 include/rtl/character.hxx        |   84 +++++++++++++++++++++++++++++++++++++++
 include/rtl/surrogates.h         |   57 --------------------------
 sal/rtl/string.cxx               |    8 +--
 sal/rtl/uri.cxx                  |   18 ++++----
 sal/rtl/ustring.cxx              |   21 ++++-----
 svl/source/misc/urihelper.cxx    |    6 +-
 sw/source/filter/ww8/ww8par3.cxx |    9 ++--
 tools/source/fsys/urlobj.cxx     |    7 +--
 8 files changed, 118 insertions(+), 92 deletions(-)

New commits:
commit 9561c2f6793bede6e5092c36a4f1c8dbb782c4f4
Author: Stephan Bergmann <sbergman at redhat.com>
Date:   Mon Apr 20 13:50:52 2015 +0200

    Clean up new rtl/surrogates.h
    
    Change-Id: Iec781bdbbf216cb14c9ba5be5955123273d7699c

diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index f5c9490..52151e8 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -211,6 +211,90 @@ inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
         - static_cast<sal_Int32>(toAsciiLowerCase(code2));
 }
 
+/// @cond INTERNAL
+namespace detail {
+
+sal_uInt32 const surrogatesHighFirst = 0xD800;
+sal_uInt32 const surrogatesHighLast = 0xDBFF;
+sal_uInt32 const surrogatesLowFirst = 0xDC00;
+sal_uInt32 const surrogatesLowLast = 0xDFFF;
+
+}
+/// @endcond
+
+/** Check for high surrogate.
+
+    @param code  A Unicode code point.
+
+    @return  True if code is a high surrogate code point (0xD800--0xDBFF).
+
+    @since LibreOffice 5.0
+*/
+inline bool isHighSurrogate(sal_uInt32 code) {
+    assert(code <= 0x10FFFF);
+    return code >= detail::surrogatesHighFirst
+        && code <= detail::surrogatesHighLast;
+}
+
+/** Check for low surrogate.
+
+    @param code  A Unicode code point.
+
+    @return  True if code is a low surrogate code point (0xDC00--0xDFFF).
+
+    @since LibreOffice 5.0
+*/
+inline bool isLowSurrogate(sal_uInt32 code) {
+    assert(code <= 0x10FFFF);
+    return code >= detail::surrogatesLowFirst
+        && code <= detail::surrogatesLowLast;
+}
+
+/** Get high surrogate half of a non-BMP Unicode code point.
+
+    @param code  A non-BMP Unicode code point.
+
+    @return  The UTF-16 high surrogate half for the give code point.
+
+    @since LibreOffice 5.0
+ */
+inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
+    assert(code <= 0x10FFFF);
+    assert(code >= 0x10000);
+    return ((code - 0x10000) >> 10) | detail::surrogatesHighFirst;
+}
+
+/** Get low surrogate half of a non-BMP Unicode code point.
+
+    @param code  A non-BMP Unicode code point.
+
+    @return  The UTF-16 low surrogate half for the give code point.
+
+    @since LibreOffice 5.0
+ */
+inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
+    assert(code <= 0x10FFFF);
+    assert(code >= 0x10000);
+    return ((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst;
+}
+
+/** Combine surrogates to form a code point.
+
+    @param high  A high surrogate code point.
+
+    @param low  A low surrogate code point.
+
+    @return  The code point represented by the surrogate pair.
+
+    @since LibreOffice 5.0
+*/
+inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
+    assert(isHighSurrogate(high));
+    assert(isLowSurrogate(low));
+    return ((high - detail::surrogatesHighFirst) << 10)
+        + (low - detail::surrogatesLowFirst) + 0x10000;
+}
+
 }
 
 #endif
diff --git a/include/rtl/surrogates.h b/include/rtl/surrogates.h
deleted file mode 100644
index ab98cd6..0000000
--- a/include/rtl/surrogates.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- *   Licensed to the Apache Software Foundation (ASF) under one or more
- *   contributor license agreements. See the NOTICE file distributed
- *   with this work for additional information regarding copyright
- *   ownership. The ASF licenses this file to you under the Apache
- *   License, Version 2.0 (the "License"); you may not use this file
- *   except in compliance with the License. You may obtain a copy of
- *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
- */
-
-#ifndef INCLUDED_RTL_SURROGATES_H
-#define INCLUDED_RTL_SURROGATES_H
-
-#include <sal/config.h>
-
-#include <sal/types.h>
-
-#define SAL_RTL_FIRST_HIGH_SURROGATE 0xD800
-#define SAL_RTL_LAST_HIGH_SURROGATE 0xDBFF
-#define SAL_RTL_FIRST_LOW_SURROGATE 0xDC00
-#define SAL_RTL_LAST_LOW_SURROGATE 0xDFFF
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-inline bool isHighSurrogate(sal_uInt32 utf16) {
-    return utf16 >= SAL_RTL_FIRST_HIGH_SURROGATE
-        && utf16 <= SAL_RTL_LAST_HIGH_SURROGATE;
-}
-
-inline bool isLowSurrogate(sal_uInt32 utf16) {
-    return utf16 >= SAL_RTL_FIRST_LOW_SURROGATE
-        && utf16 <= SAL_RTL_LAST_LOW_SURROGATE;
-}
-
-inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
-    return ((high - SAL_RTL_FIRST_HIGH_SURROGATE) << 10)
-        + (low - SAL_RTL_FIRST_LOW_SURROGATE) + 0x10000;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx
index 68a85f0..3647908 100644
--- a/sal/rtl/string.cxx
+++ b/sal/rtl/string.cxx
@@ -32,7 +32,7 @@
 #include <rtl/tencinfo.h>
 
 #include "strimp.hxx"
-#include <rtl/surrogates.h>
+#include <rtl/character.hxx>
 #include <rtl/string.h>
 
 #include "rtl/math.h"
@@ -154,7 +154,7 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
             n += 2;
         else
         {
-            if ( !isHighSurrogate(c) )
+            if ( !rtl::isHighSurrogate(c) )
                 n += 3;
             else
             {
@@ -163,9 +163,9 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
                 if ( pStr+1 < pEndStr )
                 {
                     c = *(pStr+1);
-                    if ( isLowSurrogate(c) )
+                    if ( rtl::isLowSurrogate(c) )
                     {
-                        nUCS4Char = combineSurrogates(nUCS4Char, c);
+                        nUCS4Char = rtl::combineSurrogates(nUCS4Char, c);
                         pStr++;
                     }
                 }
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index a90b40b..ea895e5 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -20,7 +20,6 @@
 #include "osl/diagnose.h"
 #include "rtl/character.hxx"
 #include "rtl/strbuf.hxx"
-#include "rtl/surrogates.h"
 #include "rtl/textenc.h"
 #include "rtl/textcvt.h"
 #include "rtl/uri.h"
@@ -133,8 +132,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                     p += 3;
                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
                 }
-                if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
-                    && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
+                if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
+                    && !rtl::isHighSurrogate(nEncoded)
+                    && !rtl::isLowSurrogate(nEncoded))
                 {
                     *pBegin = p;
                     *pType = EscapeChar;
@@ -171,10 +171,10 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                     *pBegin = p;
                     *pType = EscapeChar;
                     assert( nDstSize == 1
-                        || (nDstSize == 2 && isHighSurrogate(aDst[0])
-                            && isLowSurrogate(aDst[1])));
+                        || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
+                            && rtl::isLowSurrogate(aDst[1])));
                     return nDstSize == 1
-                        ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
+                        ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
                 }
                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
                          && pEnd - p >= 3 && p[0] == cEscapePrefix
@@ -205,9 +205,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
     else
     {
         *pType = EscapeNo;
-        return isHighSurrogate(nChar) && *pBegin < pEnd
-               && isLowSurrogate(**pBegin) ?
-                   combineSurrogates(nChar, *(*pBegin)++) : nChar;
+        return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
+               && rtl::isLowSurrogate(**pBegin) ?
+                   rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
     }
 }
 
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index a418c6a..3c9c8b7 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -39,7 +39,7 @@
 
 #include "hash.hxx"
 #include "strimp.hxx"
-#include <rtl/surrogates.h>
+#include <rtl/character.hxx>
 #include <rtl/ustring.h>
 
 #include "rtl/math.h"
@@ -588,9 +588,8 @@ void SAL_CALL rtl_uString_newFromCodePoints(
         if (c < 0x10000) {
             *p++ = (sal_Unicode) c;
         } else {
-            c -= 0x10000;
-            *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
-            *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
+            *p++ = rtl::getHighSurrogate(c);
+            *p++ = rtl::getLowSurrogate(c);
         }
     }
     RTL_LOG_STRING_NEW( *newString );
@@ -1049,8 +1048,8 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
     while (incrementCodePoints < 0) {
         assert(n > 0);
         cu = string->buffer[--n];
-        if (isLowSurrogate(cu) && n != 0 &&
-            isHighSurrogate(string->buffer[n - 1]))
+        if (rtl::isLowSurrogate(cu) && n != 0 &&
+            rtl::isHighSurrogate(string->buffer[n - 1]))
         {
             --n;
         }
@@ -1058,18 +1057,18 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
     }
     assert(n >= 0 && n < string->length);
     cu = string->buffer[n];
-    if (isHighSurrogate(cu) && string->length - n >= 2 &&
-        isLowSurrogate(string->buffer[n + 1]))
+    if (rtl::isHighSurrogate(cu) && string->length - n >= 2 &&
+        rtl::isLowSurrogate(string->buffer[n + 1]))
     {
-        cp = combineSurrogates(cu, string->buffer[n + 1]);
+        cp = rtl::combineSurrogates(cu, string->buffer[n + 1]);
     } else {
         cp = cu;
     }
     while (incrementCodePoints > 0) {
         assert(n < string->length);
         cu = string->buffer[n++];
-        if (isHighSurrogate(cu) && n != string->length &&
-            isLowSurrogate(string->buffer[n]))
+        if (rtl::isHighSurrogate(cu) && n != string->length &&
+            rtl::isLowSurrogate(string->buffer[n]))
         {
             ++n;
         }
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 7d7cc2c..06936b8 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -36,8 +36,8 @@
 #include <com/sun/star/uri/XUriReferenceFactory.hpp>
 #include <comphelper/processfactory.hxx>
 #include <osl/diagnose.h>
+#include <rtl/character.hxx>
 #include <rtl/instance.hxx>
-#include <rtl/surrogates.h>
 #include <rtl/ustrbuf.hxx>
 #include <rtl/ustring.h>
 #include <rtl/ustring.hxx>
@@ -281,9 +281,9 @@ namespace {
 
 inline sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos)
 {
-    return isHighSurrogate(rStr[nPos])
+    return rtl::isHighSurrogate(rStr[nPos])
            && rStr.getLength() - nPos >= 2
-           && isLowSurrogate(rStr[nPos + 1]) ?
+           && rtl::isLowSurrogate(rStr[nPos + 1]) ?
         nPos + 2 : nPos + 1;
 }
 
diff --git a/sw/source/filter/ww8/ww8par3.cxx b/sw/source/filter/ww8/ww8par3.cxx
index 5bb17c9..104052a 100644
--- a/sw/source/filter/ww8/ww8par3.cxx
+++ b/sw/source/filter/ww8/ww8par3.cxx
@@ -79,7 +79,7 @@
 
 #include <IMark.hxx>
 #include <unotools/fltrcfg.hxx>
-#include <rtl/surrogates.h>
+#include <rtl/character.hxx>
 #include <xmloff/odffields.hxx>
 
 #include <stdio.h>
@@ -500,16 +500,17 @@ OUString sanitizeString(const OUString& rString)
     while (i < rString.getLength())
     {
         sal_Unicode c = rString[i];
-        if (isHighSurrogate(c))
+        if (rtl::isHighSurrogate(c))
         {
-            if (i+1 == rString.getLength() || !isLowSurrogate(rString[i+1]))
+            if (i+1 == rString.getLength()
+                || !rtl::isLowSurrogate(rString[i+1]))
             {
                 SAL_WARN("sw.ww8", "Surrogate error: high without low");
                 return rString.copy(0, i);
             }
             ++i;    //skip correct low
         }
-        if (isLowSurrogate(c)) //bare low without preceeding high
+        if (rtl::isLowSurrogate(c)) //bare low without preceeding high
         {
             SAL_WARN("sw.ww8", "Surrogate error: low without high");
             return rString.copy(0, i);
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 4dfe588..08c0c6d 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -31,7 +31,6 @@
 #include <osl/file.hxx>
 #include <rtl/character.hxx>
 #include <rtl/string.h>
-#include <rtl/surrogates.h>
 #include <rtl/textenc.h>
 #include <rtl/ustring.hxx>
 #include <sal/types.h>
@@ -4778,9 +4777,9 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
                                     nShift -= 6;
                                 }
                                 if (bUTF8 && nEncoded >= nMin
-                                    && !isHighSurrogate(nEncoded)
-                                    && !isLowSurrogate(nEncoded)
-                                    && nEncoded <= 0x10FFFF)
+                                    && nEncoded <= 0x10FFFF
+                                    && !rtl::isHighSurrogate(nEncoded)
+                                    && !rtl::isLowSurrogate(nEncoded))
                                 {
                                     rBegin = p;
                                     nUTF32 = nEncoded;


More information about the Libreoffice-commits mailing list