[Libreoffice-commits] core.git: Branch 'libreoffice-5-3' - external/hunspell

László Németh laszlo.nemeth at collabora.com
Mon Mar 27 08:39:56 UTC 2017


 external/hunspell/0002-fix-other-regression-in-compounding.patch             |   43 ++++++
 external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch |   66 ++++++++++
 external/hunspell/UnpackedTarball_hunspell.mk                                |    2 
 3 files changed, 111 insertions(+)

New commits:
commit 23c028ba014f521bf1b70c83439e0915e56c296f
Author: László Németh <laszlo.nemeth at collabora.com>
Date:   Fri Mar 24 15:26:49 2017 +0100

    tdf#106751 fix regressions in Hungarian spell checking
    
    using recent fixes of Hunspell code base
    
    Change-Id: I180a2ecba924180419c5eb1a0e78b5c84e7242c4
    Reviewed-on: https://gerrit.libreoffice.org/35670
    Tested-by: Jenkins <ci at libreoffice.org>
    Tested-by: László Németh <nemeth at numbertext.org>
    Reviewed-by: László Németh <nemeth at numbertext.org>
    Reviewed-by: Andras Timar <andras.timar at collabora.com>

diff --git a/external/hunspell/0002-fix-other-regression-in-compounding.patch b/external/hunspell/0002-fix-other-regression-in-compounding.patch
new file mode 100644
index 000000000000..cbf29e54f93a
--- /dev/null
+++ b/external/hunspell/0002-fix-other-regression-in-compounding.patch
@@ -0,0 +1,43 @@
+From 1fada01663b29b57c010a9c274e45a5cf9ecf222 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
+ <laszlo.nemeth at collabora.com>
+Date: Sun, 19 Mar 2017 13:19:29 +0100
+Subject: [PATCH 2/7] fix other regression in compounding
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Allow compound words again with
+starting "kor", "alak", "asszony", "úr"
+related to the "REP kor _kor" etc. rules
+using the Hungarian spelling dictionary.
+
+regression from...
+
+commit 73b1cad1af7ab94252f75784fa6724cf062a6966
+Author: Martin Hosken <martin_hosken at sil.org>
+Date:   Mon Apr 18 16:28:26 2016 +0700
+
+    Add support for bounded conversion
+---
+ src/hunspell/affixmgr.cxx | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
+index 78c70e7..ec2093d 100644
+--- a/src/hunspell/affixmgr.cxx
++++ b/src/hunspell/affixmgr.cxx
+@@ -1290,8 +1290,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) {
+     // search every occurence of the pattern in the word
+     while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
+       std::string candidate(word);
+-      size_t type = r == word ? 1 : 0;
+-      if (r - word + reptable[i].pattern.size() == lenp)
++      size_t type = r == word && langnum != LANG_hu ? 1 : 0;
++      if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu)
+         type += 2;
+       candidate.replace(r - word, lenp, reptable[i].outstrings[type]);
+       if (candidate_check(candidate.c_str(), candidate.size()))
+-- 
+2.7.4
+
diff --git a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
new file mode 100644
index 000000000000..670d938e5441
--- /dev/null
+++ b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
@@ -0,0 +1,66 @@
+From f4ec6a283f972c82d068f4472320d424c40d45cb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
+ <laszlo.nemeth at collabora.com>
+Date: Thu, 23 Mar 2017 16:40:52 +0100
+Subject: [PATCH 5/7] fix syllable counting in compound word handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Note: one of the fixed regressions is related to an old
+hidden mistake: using clen instead of blen of the stem
+word lengths was indifferent with the original get_syllable(),
+because blen == clen at 8-bit encodings, and UTF-8
+words were handled by null-termination. Implementing Unicode
+support in Hunspell, clen was changed only in
+compound_check_morph() to blen accidentally, but not
+in compound_check(), resulting problems from the
+recent std::string conversion.
+
+Now this commit is a real fix for the regression from the
+commit c63c93237e4decdba5544a96093448605ac549c2,
+instead of the following bad fix:
+
+commit d06b0c57ae87ee8743f1bf53f80c1f8e364db619
+Author: László Németh <laszlo.nemeth at collabora.com>
+Date:   Fri Mar 17 15:11:23 2017 +0100
+
+    fix Hungarian compound word handling
+---
+ src/hunspell/affixmgr.cxx | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
+index 2ed8233..3d65539 100644
+--- a/src/hunspell/affixmgr.cxx
++++ b/src/hunspell/affixmgr.cxx
+@@ -1816,7 +1816,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
+           // LANG_hu section: spec. Hungarian rule
+           if (langnum == LANG_hu) {
+             // calculate syllable number of the word
+-            numsyllable += get_syllable(st.substr(i));
++            numsyllable += get_syllable(st.substr(0, i));
+             // + 1 word, if syllable number of the prefix > 1 (hungarian
+             // convention)
+             if (pfx && (get_syllable(pfx->getKey()) > 1))
+@@ -1901,7 +1901,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
+                  (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
+                 (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+                  ((cpdmaxsyllable != 0) &&
+-                  (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <=
++                  (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
+                    cpdmaxsyllable))) &&
+                 (
+                     // test CHECKCOMPOUNDPATTERN
+@@ -2382,7 +2382,7 @@ int AffixMgr::compound_check_morph(const char* word,
+         // LANG_hu section: spec. Hungarian rule
+         if (langnum == LANG_hu) {
+           // calculate syllable number of the word
+-          numsyllable += get_syllable(st.substr(i));
++          numsyllable += get_syllable(st.substr(0, i));
+ 
+           // + 1 word, if syllable number of the prefix > 1 (hungarian
+           // convention)
+-- 
+2.7.4
+
diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk
index b30bd083b407..40a4a101a8f0 100644
--- a/external/hunspell/UnpackedTarball_hunspell.mk
+++ b/external/hunspell/UnpackedTarball_hunspell.mk
@@ -28,6 +28,8 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
 	external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \
 	external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \
 	external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \
+	external/hunspell/0002-fix-other-regression-in-compounding.patch \
+	external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch \
 ))
 
 # vim: set noet sw=4 ts=4:


More information about the Libreoffice-commits mailing list