[Libreoffice-commits] .: 3 commits - i18npool/qa i18npool/source

Caolán McNamara caolan at kemper.freedesktop.org
Tue Feb 14 08:10:58 PST 2012


 i18npool/qa/cppunit/test_breakiterator.cxx              |   35 +++++++++++++++-
 i18npool/source/breakiterator/breakiterator_unicode.cxx |    7 ++-
 2 files changed, 39 insertions(+), 3 deletions(-)

New commits:
commit 475d0c59c66fb7752d230f76130b17145aad0c12
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Tue Feb 14 16:07:10 2012 +0000

    use icu's breakiterator for Thai, not our customized generic rules

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index ba000af..820e57b 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -70,7 +70,7 @@ public:
     CPPUNIT_TEST(testGraphemeIteration);
     CPPUNIT_TEST(testWeak);
     CPPUNIT_TEST(testAsian);
-//    CPPUNIT_TEST(testThai);
+    CPPUNIT_TEST(testThai);
     CPPUNIT_TEST_SUITE_END();
 
 private:
@@ -264,13 +264,13 @@ void TestBreakIterator::testThai()
     {
         const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
         ::rtl::OUString aTest(THAI1, SAL_N_ELEMENTS(THAI1));
-
         aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
             i18n::WordType::DICTIONARY_WORD, true);
         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
             aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     }
 
+#ifdef TODO
     {
         const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
         ::rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
@@ -279,6 +279,7 @@ void TestBreakIterator::testThai()
         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
             aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     }
+#endif
 }
 
 TestBreakIterator::TestBreakIterator()
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 5f35795..aa5e1d8 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -129,10 +129,13 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
 
             OOoRuleBasedBreakIterator *rbi = NULL;
 
-            if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty()) {
+            if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty())
+            {
                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
-            } else {
+            }
+            else if (!rLocale.Language.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("th"))) //use icu's breakiterator for Thai
+            {
                 status = U_ZERO_ERROR;
                 OStringBuffer aUDName(64);
                 aUDName.append(rule);
commit fad63878e529ad775f4484330235242a7a900987
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Tue Feb 14 14:14:23 2012 +0000

    add northern-thai example

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index a68bc77..ba000af 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -260,14 +260,24 @@ void TestBreakIterator::testThai()
     aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th"));
     aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
 
+    i18n::Boundary aBounds;
     {
         const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
-        ::rtl::OUString aTest1(THAI1, SAL_N_ELEMENTS(THAI1));
+        ::rtl::OUString aTest(THAI1, SAL_N_ELEMENTS(THAI1));
 
-        i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest1, 0, aLocale,
+        aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
             i18n::WordType::DICTIONARY_WORD, true);
         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
-            aBounds.startPos == 0 && aBounds.endPos == aTest1.getLength());
+            aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
+    }
+
+    {
+        const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
+        ::rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
+        aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_MESSAGE("Should skip full word",
+            aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     }
 }
 
commit 1a450f53dbd31a34718f658a81b0fabbbc329764
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Tue Feb 14 14:09:12 2012 +0000

    add a unit test to demo broken thai word-boundary detection

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 661e46a..a68bc77 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -41,7 +41,8 @@
 #include "cppunit/plugin/TestPlugIn.h"
 #include <com/sun/star/i18n/XBreakIterator.hpp>
 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
-#include <com/sun/star/i18n/ScriptType.hdl>
+#include <com/sun/star/i18n/ScriptType.hpp>
+#include <com/sun/star/i18n/WordType.hpp>
 
 #include <rtl/strbuf.hxx>
 
@@ -62,12 +63,14 @@ public:
     void testGraphemeIteration();
     void testWeak();
     void testAsian();
+    void testThai();
 
     CPPUNIT_TEST_SUITE(TestBreakIterator);
     CPPUNIT_TEST(testLineBreaking);
     CPPUNIT_TEST(testGraphemeIteration);
     CPPUNIT_TEST(testWeak);
     CPPUNIT_TEST(testAsian);
+//    CPPUNIT_TEST(testThai);
     CPPUNIT_TEST_SUITE_END();
 
 private:
@@ -249,6 +252,25 @@ void TestBreakIterator::testAsian()
     }
 }
 
+//A test to ensure that our thai word boundary detection is useful
+//http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
+void TestBreakIterator::testThai()
+{
+    lang::Locale aLocale;
+    aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th"));
+    aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
+
+    {
+        const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
+        ::rtl::OUString aTest1(THAI1, SAL_N_ELEMENTS(THAI1));
+
+        i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest1, 0, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_MESSAGE("Should skip full word",
+            aBounds.startPos == 0 && aBounds.endPos == aTest1.getLength());
+    }
+}
+
 TestBreakIterator::TestBreakIterator()
 {
     m_xContext = cppu::defaultBootstrap_InitialComponentContext();


More information about the Libreoffice-commits mailing list