[Libreoffice-commits] core.git: include/rtl sal/rtl svl/source sw/source tools/source
Stephan Bergmann
sbergman at redhat.com
Mon Apr 20 04:54:56 PDT 2015
include/rtl/character.hxx | 84 +++++++++++++++++++++++++++++++++++++++
include/rtl/surrogates.h | 57 --------------------------
sal/rtl/string.cxx | 8 +--
sal/rtl/uri.cxx | 18 ++++----
sal/rtl/ustring.cxx | 21 ++++-----
svl/source/misc/urihelper.cxx | 6 +-
sw/source/filter/ww8/ww8par3.cxx | 9 ++--
tools/source/fsys/urlobj.cxx | 7 +--
8 files changed, 118 insertions(+), 92 deletions(-)
New commits:
commit 9561c2f6793bede6e5092c36a4f1c8dbb782c4f4
Author: Stephan Bergmann <sbergman at redhat.com>
Date: Mon Apr 20 13:50:52 2015 +0200
Clean up new rtl/surrogates.h
Change-Id: Iec781bdbbf216cb14c9ba5be5955123273d7699c
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index f5c9490..52151e8 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -211,6 +211,90 @@ inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
- static_cast<sal_Int32>(toAsciiLowerCase(code2));
}
+/// @cond INTERNAL
+namespace detail {
+
+sal_uInt32 const surrogatesHighFirst = 0xD800;
+sal_uInt32 const surrogatesHighLast = 0xDBFF;
+sal_uInt32 const surrogatesLowFirst = 0xDC00;
+sal_uInt32 const surrogatesLowLast = 0xDFFF;
+
+}
+/// @endcond
+
+/** Check for high surrogate.
+
+ @param code A Unicode code point.
+
+ @return True if code is a high surrogate code point (0xD800--0xDBFF).
+
+ @since LibreOffice 5.0
+*/
+inline bool isHighSurrogate(sal_uInt32 code) {
+ assert(code <= 0x10FFFF);
+ return code >= detail::surrogatesHighFirst
+ && code <= detail::surrogatesHighLast;
+}
+
+/** Check for low surrogate.
+
+ @param code A Unicode code point.
+
+ @return True if code is a low surrogate code point (0xDC00--0xDFFF).
+
+ @since LibreOffice 5.0
+*/
+inline bool isLowSurrogate(sal_uInt32 code) {
+ assert(code <= 0x10FFFF);
+ return code >= detail::surrogatesLowFirst
+ && code <= detail::surrogatesLowLast;
+}
+
+/** Get high surrogate half of a non-BMP Unicode code point.
+
+ @param code A non-BMP Unicode code point.
+
+ @return The UTF-16 high surrogate half for the give code point.
+
+ @since LibreOffice 5.0
+ */
+inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
+ assert(code <= 0x10FFFF);
+ assert(code >= 0x10000);
+ return ((code - 0x10000) >> 10) | detail::surrogatesHighFirst;
+}
+
+/** Get low surrogate half of a non-BMP Unicode code point.
+
+ @param code A non-BMP Unicode code point.
+
+ @return The UTF-16 low surrogate half for the give code point.
+
+ @since LibreOffice 5.0
+ */
+inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
+ assert(code <= 0x10FFFF);
+ assert(code >= 0x10000);
+ return ((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst;
+}
+
+/** Combine surrogates to form a code point.
+
+ @param high A high surrogate code point.
+
+ @param low A low surrogate code point.
+
+ @return The code point represented by the surrogate pair.
+
+ @since LibreOffice 5.0
+*/
+inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
+ assert(isHighSurrogate(high));
+ assert(isLowSurrogate(low));
+ return ((high - detail::surrogatesHighFirst) << 10)
+ + (low - detail::surrogatesLowFirst) + 0x10000;
+}
+
}
#endif
diff --git a/include/rtl/surrogates.h b/include/rtl/surrogates.h
deleted file mode 100644
index ab98cd6..0000000
--- a/include/rtl/surrogates.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed
- * with this work for additional information regarding copyright
- * ownership. The ASF licenses this file to you under the Apache
- * License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.apache.org/licenses/LICENSE-2.0 .
- */
-
-#ifndef INCLUDED_RTL_SURROGATES_H
-#define INCLUDED_RTL_SURROGATES_H
-
-#include <sal/config.h>
-
-#include <sal/types.h>
-
-#define SAL_RTL_FIRST_HIGH_SURROGATE 0xD800
-#define SAL_RTL_LAST_HIGH_SURROGATE 0xDBFF
-#define SAL_RTL_FIRST_LOW_SURROGATE 0xDC00
-#define SAL_RTL_LAST_LOW_SURROGATE 0xDFFF
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-inline bool isHighSurrogate(sal_uInt32 utf16) {
- return utf16 >= SAL_RTL_FIRST_HIGH_SURROGATE
- && utf16 <= SAL_RTL_LAST_HIGH_SURROGATE;
-}
-
-inline bool isLowSurrogate(sal_uInt32 utf16) {
- return utf16 >= SAL_RTL_FIRST_LOW_SURROGATE
- && utf16 <= SAL_RTL_LAST_LOW_SURROGATE;
-}
-
-inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
- return ((high - SAL_RTL_FIRST_HIGH_SURROGATE) << 10)
- + (low - SAL_RTL_FIRST_LOW_SURROGATE) + 0x10000;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx
index 68a85f0..3647908 100644
--- a/sal/rtl/string.cxx
+++ b/sal/rtl/string.cxx
@@ -32,7 +32,7 @@
#include <rtl/tencinfo.h>
#include "strimp.hxx"
-#include <rtl/surrogates.h>
+#include <rtl/character.hxx>
#include <rtl/string.h>
#include "rtl/math.h"
@@ -154,7 +154,7 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
n += 2;
else
{
- if ( !isHighSurrogate(c) )
+ if ( !rtl::isHighSurrogate(c) )
n += 3;
else
{
@@ -163,9 +163,9 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
if ( pStr+1 < pEndStr )
{
c = *(pStr+1);
- if ( isLowSurrogate(c) )
+ if ( rtl::isLowSurrogate(c) )
{
- nUCS4Char = combineSurrogates(nUCS4Char, c);
+ nUCS4Char = rtl::combineSurrogates(nUCS4Char, c);
pStr++;
}
}
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index a90b40b..ea895e5 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -20,7 +20,6 @@
#include "osl/diagnose.h"
#include "rtl/character.hxx"
#include "rtl/strbuf.hxx"
-#include "rtl/surrogates.h"
#include "rtl/textenc.h"
#include "rtl/textcvt.h"
#include "rtl/uri.h"
@@ -133,8 +132,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
p += 3;
nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
}
- if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
- && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
+ if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
+ && !rtl::isHighSurrogate(nEncoded)
+ && !rtl::isLowSurrogate(nEncoded))
{
*pBegin = p;
*pType = EscapeChar;
@@ -171,10 +171,10 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
*pBegin = p;
*pType = EscapeChar;
assert( nDstSize == 1
- || (nDstSize == 2 && isHighSurrogate(aDst[0])
- && isLowSurrogate(aDst[1])));
+ || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
+ && rtl::isLowSurrogate(aDst[1])));
return nDstSize == 1
- ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
+ ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
}
else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
&& pEnd - p >= 3 && p[0] == cEscapePrefix
@@ -205,9 +205,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
else
{
*pType = EscapeNo;
- return isHighSurrogate(nChar) && *pBegin < pEnd
- && isLowSurrogate(**pBegin) ?
- combineSurrogates(nChar, *(*pBegin)++) : nChar;
+ return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
+ && rtl::isLowSurrogate(**pBegin) ?
+ rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
}
}
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index a418c6a..3c9c8b7 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -39,7 +39,7 @@
#include "hash.hxx"
#include "strimp.hxx"
-#include <rtl/surrogates.h>
+#include <rtl/character.hxx>
#include <rtl/ustring.h>
#include "rtl/math.h"
@@ -588,9 +588,8 @@ void SAL_CALL rtl_uString_newFromCodePoints(
if (c < 0x10000) {
*p++ = (sal_Unicode) c;
} else {
- c -= 0x10000;
- *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
- *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
+ *p++ = rtl::getHighSurrogate(c);
+ *p++ = rtl::getLowSurrogate(c);
}
}
RTL_LOG_STRING_NEW( *newString );
@@ -1049,8 +1048,8 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
while (incrementCodePoints < 0) {
assert(n > 0);
cu = string->buffer[--n];
- if (isLowSurrogate(cu) && n != 0 &&
- isHighSurrogate(string->buffer[n - 1]))
+ if (rtl::isLowSurrogate(cu) && n != 0 &&
+ rtl::isHighSurrogate(string->buffer[n - 1]))
{
--n;
}
@@ -1058,18 +1057,18 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
}
assert(n >= 0 && n < string->length);
cu = string->buffer[n];
- if (isHighSurrogate(cu) && string->length - n >= 2 &&
- isLowSurrogate(string->buffer[n + 1]))
+ if (rtl::isHighSurrogate(cu) && string->length - n >= 2 &&
+ rtl::isLowSurrogate(string->buffer[n + 1]))
{
- cp = combineSurrogates(cu, string->buffer[n + 1]);
+ cp = rtl::combineSurrogates(cu, string->buffer[n + 1]);
} else {
cp = cu;
}
while (incrementCodePoints > 0) {
assert(n < string->length);
cu = string->buffer[n++];
- if (isHighSurrogate(cu) && n != string->length &&
- isLowSurrogate(string->buffer[n]))
+ if (rtl::isHighSurrogate(cu) && n != string->length &&
+ rtl::isLowSurrogate(string->buffer[n]))
{
++n;
}
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 7d7cc2c..06936b8 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -36,8 +36,8 @@
#include <com/sun/star/uri/XUriReferenceFactory.hpp>
#include <comphelper/processfactory.hxx>
#include <osl/diagnose.h>
+#include <rtl/character.hxx>
#include <rtl/instance.hxx>
-#include <rtl/surrogates.h>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.h>
#include <rtl/ustring.hxx>
@@ -281,9 +281,9 @@ namespace {
inline sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos)
{
- return isHighSurrogate(rStr[nPos])
+ return rtl::isHighSurrogate(rStr[nPos])
&& rStr.getLength() - nPos >= 2
- && isLowSurrogate(rStr[nPos + 1]) ?
+ && rtl::isLowSurrogate(rStr[nPos + 1]) ?
nPos + 2 : nPos + 1;
}
diff --git a/sw/source/filter/ww8/ww8par3.cxx b/sw/source/filter/ww8/ww8par3.cxx
index 5bb17c9..104052a 100644
--- a/sw/source/filter/ww8/ww8par3.cxx
+++ b/sw/source/filter/ww8/ww8par3.cxx
@@ -79,7 +79,7 @@
#include <IMark.hxx>
#include <unotools/fltrcfg.hxx>
-#include <rtl/surrogates.h>
+#include <rtl/character.hxx>
#include <xmloff/odffields.hxx>
#include <stdio.h>
@@ -500,16 +500,17 @@ OUString sanitizeString(const OUString& rString)
while (i < rString.getLength())
{
sal_Unicode c = rString[i];
- if (isHighSurrogate(c))
+ if (rtl::isHighSurrogate(c))
{
- if (i+1 == rString.getLength() || !isLowSurrogate(rString[i+1]))
+ if (i+1 == rString.getLength()
+ || !rtl::isLowSurrogate(rString[i+1]))
{
SAL_WARN("sw.ww8", "Surrogate error: high without low");
return rString.copy(0, i);
}
++i; //skip correct low
}
- if (isLowSurrogate(c)) //bare low without preceeding high
+ if (rtl::isLowSurrogate(c)) //bare low without preceeding high
{
SAL_WARN("sw.ww8", "Surrogate error: low without high");
return rString.copy(0, i);
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 4dfe588..08c0c6d 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -31,7 +31,6 @@
#include <osl/file.hxx>
#include <rtl/character.hxx>
#include <rtl/string.h>
-#include <rtl/surrogates.h>
#include <rtl/textenc.h>
#include <rtl/ustring.hxx>
#include <sal/types.h>
@@ -4778,9 +4777,9 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
nShift -= 6;
}
if (bUTF8 && nEncoded >= nMin
- && !isHighSurrogate(nEncoded)
- && !isLowSurrogate(nEncoded)
- && nEncoded <= 0x10FFFF)
+ && nEncoded <= 0x10FFFF
+ && !rtl::isHighSurrogate(nEncoded)
+ && !rtl::isLowSurrogate(nEncoded))
{
rBegin = p;
nUTF32 = nEncoded;
More information about the Libreoffice-commits
mailing list