[Libreoffice-commits] .: 2 commits - comphelper/inc comphelper/qa comphelper/source

Caolán McNamara caolan at kemper.freedesktop.org
Fri Feb 18 02:21:19 PST 2011


 comphelper/inc/comphelper/string.hxx |   21 +++++++
 comphelper/qa/string/test_string.cxx |   13 ++++
 comphelper/source/misc/string.cxx    |  102 ++++++++++++++++++++++++++++++++++-
 3 files changed, 134 insertions(+), 2 deletions(-)

New commits:
commit ac7f067881d2afcbd57ed70a52e6cd1d09398493
Author: Sébastien Le Ray <sebastien-libreoffice at orniz.org>
Date:   Thu Feb 17 10:43:08 2011 +0100

    Use decimalStringToNumber for natural comparison

diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx
index 8231e32..9ebe88a 100644
--- a/comphelper/source/misc/string.cxx
+++ b/comphelper/source/misc/string.cxx
@@ -275,8 +275,8 @@ sal_Int32 compareNatural( const ::rtl::OUString & rLHS, const ::rtl::OUString &
         //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
         //vcl
 
-        sal_Int32 nLHS = rLHS.copy(nLHSFirstDigitPos, nLHSChunkLen).toInt32();
-        sal_Int32 nRHS = rRHS.copy(nRHSFirstDigitPos, nRHSChunkLen).toInt32();
+        sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.copy(nLHSFirstDigitPos, nLHSChunkLen));
+        sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.copy(nRHSFirstDigitPos, nRHSChunkLen));
 
         nRet = nLHS-nRHS;
         if (nRet != 0)
commit 4919987ff6a50313925341aad6141249739ea67e
Author: Sébastien Le Ray <sebastien-libreoffice at orniz.org>
Date:   Thu Feb 17 10:41:21 2011 +0100

    Added comphelper::string::decimalStringToNumber

diff --git a/comphelper/inc/comphelper/string.hxx b/comphelper/inc/comphelper/string.hxx
index 62f3d83..88f4936 100644
--- a/comphelper/inc/comphelper/string.hxx
+++ b/comphelper/inc/comphelper/string.hxx
@@ -122,6 +122,27 @@ COMPHELPER_DLLPUBLIC ::rtl::OUString&
 COMPHELPER_DLLPUBLIC ::rtl::OUString convertCommaSeparated(
     ::com::sun::star::uno::Sequence< ::rtl::OUString > const & i_rSeq);
 
+/** Convert a decimal string to a number.
+
+    The string must be base-10, no sign but can contain any
+    codepoint listed in the "Number, Decimal Digit" Unicode
+    category.
+
+    No verification is made about the validity of the string,
+    passing string not containing decimal digit code points
+    gives unspecified results
+
+    If your string is guaranteed to contain only ASCII digit
+    use rtl::OUString::toInt32 instead.
+
+    @param str  The string to convert containing only decimal
+                digit codepoints.
+
+    @return     The value of the string as an int32.
+ */
+COMPHELPER_DLLPUBLIC sal_uInt32 decimalStringToNumber(
+    ::rtl::OUString const & str );
+
 /** Convert a single comma separated string to a sequence of strings.
 
     Note that no escaping of commas or anything fancy is done.
diff --git a/comphelper/qa/string/test_string.cxx b/comphelper/qa/string/test_string.cxx
index aa733cf..727b76a 100644
--- a/comphelper/qa/string/test_string.cxx
+++ b/comphelper/qa/string/test_string.cxx
@@ -49,10 +49,12 @@ class TestString: public CppUnit::TestFixture
 public:
     void test();
     void testNatural();
+    void testDecimalStringToNumber();
 
     CPPUNIT_TEST_SUITE(TestString);
     CPPUNIT_TEST(test);
     CPPUNIT_TEST(testNatural);
+    CPPUNIT_TEST(testDecimalStringToNumber);
     CPPUNIT_TEST_SUITE_END();
 };
 
@@ -86,6 +88,17 @@ void TestString::test()
     CPPUNIT_ASSERT(n3 == -1);
 }
 
+void TestString::testDecimalStringToNumber() {
+    rtl::OUString s1(RTL_CONSTASCII_USTRINGPARAM("1234"));
+    CPPUNIT_ASSERT_EQUAL((sal_uInt32)1234, comphelper::string::decimalStringToNumber(s1));
+    s1 += rtl::OUString(L'\u07C6');
+    CPPUNIT_ASSERT_EQUAL((sal_uInt32)12346, comphelper::string::decimalStringToNumber(s1));
+    // Codepoints on 2 16bits words
+    sal_uInt32 utf16String[] = { 0x1D7FE /* 8 */, 0x1D7F7 /* 1 */};
+    s1 = rtl::OUString(utf16String, 2);
+    CPPUNIT_ASSERT_EQUAL((sal_uInt32)81, comphelper::string::decimalStringToNumber(s1));
+}
+
 using namespace ::com::sun::star;
 
 class testCollator : public cppu::WeakImplHelper1< i18n::XCollator >
diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx
index aa4efaa..8231e32 100644
--- a/comphelper/source/misc/string.cxx
+++ b/comphelper/source/misc/string.cxx
@@ -94,6 +94,103 @@ rtl::OUString searchAndReplaceAsciiL(
     return _source;
 }
 
+sal_uInt32 decimalStringToNumber(
+    ::rtl::OUString const & str )
+{
+    sal_uInt32 result = 0;
+    for( sal_Int32 i = 0 ; i < str.getLength() ; )
+    {
+        sal_uInt32 c = str.iterateCodePoints(&i);
+        sal_uInt8 value = 0;
+        if( c <= 0x0039)    // ASCII decimal digits, most common
+            value = c - 0x0030;
+        else if( c >= 0x1D7F6 )    // mathematical monospace digits
+            value = c - 0x1D7F6;
+        else if( c >= 0x1D7EC ) // mathematical sans-serif bold digits
+            value = c - 0x1D7EC;
+        else if( c >= 0x1D7E2 ) // mathematical sans-serif digits
+            value = c - 0x1D7E2;
+        else if( c >= 0x1D7D8 ) // mathematical double-struck digits
+            value = c - 0x1D7D8;
+        else if( c >= 0x1D7CE ) // mathematical bold digits
+            value = c - 0x1D7CE;
+        else if( c >= 0x11066 ) // brahmi digits
+            value = c - 0x11066;
+        else if( c >= 0x104A0 ) // osmanya digits
+            value = c - 0x104A0;
+        else if( c >= 0xFF10 ) // fullwidth digits
+            value = c - 0xFF10;
+        else if( c >= 0xABF0 ) // meetei mayek digits
+            value = c - 0xABF0;
+        else if( c >= 0xAA50 ) // cham digits
+            value = c - 0xAA50;
+        else if( c >= 0xA9D0 ) // javanese digits
+            value = c - 0xA9D0;
+        else if( c >= 0xA900 ) // kayah li digits
+            value = c - 0xA900;
+        else if( c >= 0xA8D0 ) // saurashtra digits
+            value = c - 0xA8D0;
+        else if( c >= 0xA620 ) // vai digits
+            value = c - 0xA620;
+        else if( c >= 0x1C50 ) // ol chiki digits
+            value = c - 0x1C50;
+        else if( c >= 0x1C40 ) // lepcha digits
+            value = c - 0x1C40;
+        else if( c >= 0x1BB0 ) // sundanese digits
+            value = c - 0x1BB0;
+        else if( c >= 0x1B50 ) // balinese digits
+            value = c - 0x1B50;
+        else if( c >= 0x1A90 ) // tai tham tham digits
+            value = c - 0x1A90;
+        else if( c >= 0x1A80 ) // tai tham hora digits
+            value = c - 0x1A80;
+        else if( c >= 0x19D0 ) // new tai lue digits
+            value = c - 0x19D0;
+        else if( c >= 0x1946 ) // limbu digits
+            value = c - 0x1946;
+        else if( c >= 0x1810 ) // mongolian digits
+            value = c - 0x1810;
+        else if( c >= 0x17E0 ) // khmer digits
+            value = c - 0x17E0;
+        else if( c >= 0x1090 ) // myanmar shan digits
+            value = c - 0x1090;
+        else if( c >= 0x1040 ) // myanmar digits
+            value = c - 0x1040;
+        else if( c >= 0x0F20 ) // tibetan digits
+            value = c - 0x0F20;
+        else if( c >= 0x0ED0 ) // lao digits
+            value = c - 0x0ED0;
+        else if( c >= 0x0E50 ) // thai digits
+            value = c - 0x0E50;
+        else if( c >= 0x0D66 ) // malayalam digits
+            value = c - 0x0D66;
+        else if( c >= 0x0CE6 ) // kannada digits
+            value = c - 0x0CE6;
+        else if( c >= 0x0C66 ) // telugu digits
+            value = c - 0x0C66;
+        else if( c >= 0x0BE6 ) // tamil digits
+            value = c - 0x0BE6;
+        else if( c >= 0x0B66 ) // oriya digits
+            value = c - 0x0B66;
+        else if( c >= 0x0AE6 ) // gujarati digits
+            value = c - 0x0AE6;
+        else if( c >= 0x0A66 ) // gurmukhi digits
+            value = c - 0x0A66;
+        else if( c >= 0x09E6 ) // bengali digits
+            value = c - 0x09E6;
+        else if( c >= 0x0966 ) // devanagari digit
+            value = c - 0x0966;
+        else if( c >= 0x07C0 ) // nko digits
+            value = c - 0x07C0;
+        else if( c >= 0x06F0 ) // extended arabic-indic digits
+            value = c - 0x06F0;
+        else if( c >= 0x0660 ) // arabic-indic digits
+            value = c - 0x0660;
+        result = result * 10 + value;
+    }
+    return result;
+}
+
 using namespace ::com::sun::star;
 
 // convert between sequence of string and comma separated string
@@ -177,6 +274,7 @@ sal_Int32 compareNatural( const ::rtl::OUString & rLHS, const ::rtl::OUString &
         //To-Do: Possibly scale down those unicode codepoints that relate to
         //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
         //vcl
+
         sal_Int32 nLHS = rLHS.copy(nLHSFirstDigitPos, nLHSChunkLen).toInt32();
         sal_Int32 nRHS = rRHS.copy(nRHSFirstDigitPos, nRHSChunkLen).toInt32();
 


More information about the Libreoffice-commits mailing list