[Libreoffice-commits] .: i18npool/qa i18npool/source

Caolán McNamara caolan at kemper.freedesktop.org
Wed Jul 25 03:41:48 PDT 2012


 i18npool/qa/cppunit/test_breakiterator.cxx          |   38 +++++++++
 i18npool/source/breakiterator/data/README           |   80 --------------------
 i18npool/source/breakiterator/data/dict_word_ca.txt |   21 +++--
 3 files changed, 54 insertions(+), 85 deletions(-)

New commits:
commit 071a0dc02c90e939a23f99af1f9c4c65982bdd7d
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Wed Jul 25 11:37:58 2012 +0100

    Related: #i85411# catalan word breaking rules out of sync with ZWNJ
    
    I can see no reason to have specific catalan rules, old examples
    work fine with default rules
    
    Change-Id: Ifacb7b46204d8aed543ab0c77fe80d1d5c5de738

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 403c71b..b04dc53 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -397,6 +397,44 @@ void TestBreakIterator::testWordBoundaries()
         while (nPos++ < aTest.getLength());
         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     }
+
+    //See https://issues.apache.org/ooo/show_bug.cgi?id=85411
+    for (int j = 0; j < 2; ++j)
+    {
+        switch (j)
+        {
+            case 0:
+                aLocale.Language = rtl::OUString("en");
+                aLocale.Country = rtl::OUString("US");
+                break;
+            case 1:
+                aLocale.Language = rtl::OUString("ca");
+                aLocale.Country = rtl::OUString("ES");
+                break;
+            default:
+                CPPUNIT_ASSERT(false);
+                break;
+        }
+
+        const sal_Unicode TEST[] =
+        {
+            'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
+        };
+        rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
+
+        sal_Int32 nPos = 0;
+        sal_Int32 aExpected[] = {1, 6, 9, 12};
+        size_t i = 0;
+        do
+        {
+            CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
+            nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
+                i18n::WordType::DICTIONARY_WORD, true).endPos;
+            CPPUNIT_ASSERT(aExpected[i++] == nPos);
+        }
+        while (nPos++ < aTest.getLength());
+        CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
+    }
 }
 
 //See http://qa.openoffice.org/issues/show_bug.cgi?id=111152
diff --git a/i18npool/source/breakiterator/data/README b/i18npool/source/breakiterator/data/README
index cf74ec4..b058a2d 100644
--- a/i18npool/source/breakiterator/data/README
+++ b/i18npool/source/breakiterator/data/README
@@ -69,85 +69,6 @@ Date:   Thu Oct 2 13:51:29 2008 +0000
 
     #i80412#
 
-commit 672a654fa6b447df0397942c1fa6594bb63264b9
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:31:04 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.2.132); FILE MERGED
-    2008/07/23 23:07:46 khong 1.2.132.1: #i85411# Apply patch for ZWSP
-
-commit c75401da0c36bb518c41971d07660010ec745dd0
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:30:52 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.2.230); FILE MERGED
-    2008/07/23 23:07:46 khong 1.2.230.1: #i85411# Apply patch for ZWSP
-
-commit 43f49bd7d04fcc64941b5576a804f1b8bab76423
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:30:39 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.3.314); FILE MERGED
-    2008/07/23 23:07:46 khong 1.3.314.1: #i85411# Apply patch for ZWSP
-
-commit 8c4bc258ab77b586325a868d75094b1e041bd57e
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:30:26 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.5.214); FILE MERGED
-    2008/07/23 23:07:45 khong 1.5.214.1: #i85411# Apply patch for ZWSP
-
-commit 0c008c4b9b1957fffb62175a31a7085f98afbd6a
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:30:05 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.3.214); FILE MERGED
-    2008/07/23 23:07:45 khong 1.3.214.1: #i85411# Apply patch for ZWSP
-
-commit 01a7b977a133a910845c7226f36640f2edaf2ce9
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:29:53 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.2.184); FILE MERGED
-    2008/07/23 23:07:45 khong 1.2.184.1: #i85411# Apply patch for ZWSP
-
-commit 77cd396b673caa67dc1d56ecf44ee5f619244e77
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:29:40 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.2.114); FILE MERGED
-    2008/07/23 23:07:45 khong 1.2.114.1: #i85411# Apply patch for ZWSP
-
-commit 1e8949e19eb5f63504ab634c9a3e55b4b48484e0
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:29:27 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.4.214); FILE MERGED
-    2008/07/23 23:07:45 khong 1.4.214.1: #i85411# Apply patch for ZWSP
-
-commit 601733f145bf518eec4d29c2319c1f61ebd83d96
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:29:14 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.5.214); FILE MERGED
-    2008/07/23 23:07:45 khong 1.5.214.2: #i85411# Apply patch for ZWSP
-    2008/07/23 07:35:04 khong 1.5.214.1: #i85411# Apply patch for ZWSP
-
-commit 744a220b2950f488c50e7380fd45232e24921438
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:28:18 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.3.18); FILE MERGED
-    2008/07/23 23:07:45 khong 1.3.18.1: #i85411# Apply patch for ZWSP
-
-commit 8ead581613efb4ecd6121a195e04c4f5a7bc8bf1
-Author: Kurt Zenker <kz at openoffice.org>
-Date:   Thu Aug 14 15:27:36 2008 +0000
-
-    INTEGRATION: CWS i18n44 (1.27.6); FILE MERGED
-    2008/07/24 16:12:44 khong 1.27.6.2: #i85411# Apply patch for ZWSP
-    2008/07/23 23:07:44 khong 1.27.6.1: #i85411# Apply patch for ZWSP
-
 commit 9964a76ef58786bba47d409970512d7ded6c8889
 Author: Rüdiger Timm <rt at openoffice.org>
 Date:   Wed Jul 2 07:53:05 2008 +0000
@@ -700,6 +621,7 @@ Date:   Tue Jan 20 12:20:28 2004 +0000
 
 done, regression tests added:
 
+#i85411# Apply patch for ZWSP
 #i17155# fix line breakiterator rule to make slash and hyphen as part of word when doing line break
 #i13451# add '-' as midLetter for Catalan dictionary word breakiterator
 #i13494# fix word breakiterator rule to handle punctuations and signs correctly
diff --git a/i18npool/source/breakiterator/data/dict_word_ca.txt b/i18npool/source/breakiterator/data/dict_word_ca.txt
index 6ad6a0b..b1666f4 100644
--- a/i18npool/source/breakiterator/data/dict_word_ca.txt
+++ b/i18npool/source/breakiterator/data/dict_word_ca.txt
@@ -21,18 +21,24 @@ $Katakana  = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND M
                                    [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:]
                                    [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]];
 
+$Ideographic = [:Ideographic:];
+$Hangul = [:Script = HANGUL:];
 
 $ALetter   = [[:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:]
+                           - $Ideographic
                            - $Katakana
+                           - $Hangul
                            - [:Script = Thai:]
                            - [:Script = Lao:]
                            - [:Script = Hiragana:]];
                            
-$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:]
+$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] 
               [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:]
-              [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] [:name = HYPHEN-MINUS:]];  
-              
+              [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] 
+              [:name = HYPHEN-MINUS:] ];
+
 $SufixLetter = [:name= FULL STOP:];
+              
 
 $MidNum    = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:]
              [:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:]
@@ -60,7 +66,7 @@ $Extend     = [[:Grapheme_Extend = TRUE:]];
 #
 ####################################################################################
 
-$Format    = [[:Cf:]];
+$Format    = [[:Cf:] - $TheZWSP];
 
 
 
@@ -80,6 +86,8 @@ $MidNumEx     = $MidNum    $Extend*;
 $MidLetterEx  = $MidLetter $Extend*;
 $SufixLetterEx= $SufixLetter $Extend*;
 $KatakanaEx   = $Katakana  $Extend*;
+$IdeographicEx= $Ideographic  $Extend*;
+$HangulEx = $Hangul  $Extend*;
 $FormatEx     = $Format    $Extend*;
 
 
@@ -111,7 +119,8 @@ $KatakanaEx ($FormatEx* $KatakanaEx)* {300};
 #                           Separated from the "Everything Else" rule, below, only so that they
 #                           can be tagged with a return value.   TODO:  is this what we want?
 #
-# [:IDEOGRAPHIC:] $Extend* {400};
+$IdeographicEx ($FormatEx* $IdeographicEx)* {400};
+$HangulEx ($FormatEx* $HangulEx)* {400};
 
 #
 #  Everything Else, with no tag.
@@ -132,7 +141,7 @@ $CR $LF;
 #    reaches something that can only be the start (and probably only) char in a "word".
 #    A space or punctuation meets the test.
 #
-$NonStarters = [$Numeric $ALetter $Katakana [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format];
+$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format];
 
 #!.*;
 ! ($NonStarters* | \n \r) .;


More information about the Libreoffice-commits mailing list