[Libreoffice-commits] core.git: Branch 'libreoffice-5-3' - external/hunspell
László Németh
laszlo.nemeth at collabora.com
Mon Mar 27 08:39:56 UTC 2017
external/hunspell/0002-fix-other-regression-in-compounding.patch | 43 ++++++
external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch | 66 ++++++++++
external/hunspell/UnpackedTarball_hunspell.mk | 2
3 files changed, 111 insertions(+)
New commits:
commit 23c028ba014f521bf1b70c83439e0915e56c296f
Author: László Németh <laszlo.nemeth at collabora.com>
Date: Fri Mar 24 15:26:49 2017 +0100
tdf#106751 fix regressions in Hungarian spell checking
using recent fixes of Hunspell code base
Change-Id: I180a2ecba924180419c5eb1a0e78b5c84e7242c4
Reviewed-on: https://gerrit.libreoffice.org/35670
Tested-by: Jenkins <ci at libreoffice.org>
Tested-by: László Németh <nemeth at numbertext.org>
Reviewed-by: László Németh <nemeth at numbertext.org>
Reviewed-by: Andras Timar <andras.timar at collabora.com>
diff --git a/external/hunspell/0002-fix-other-regression-in-compounding.patch b/external/hunspell/0002-fix-other-regression-in-compounding.patch
new file mode 100644
index 000000000000..cbf29e54f93a
--- /dev/null
+++ b/external/hunspell/0002-fix-other-regression-in-compounding.patch
@@ -0,0 +1,43 @@
+From 1fada01663b29b57c010a9c274e45a5cf9ecf222 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
+ <laszlo.nemeth at collabora.com>
+Date: Sun, 19 Mar 2017 13:19:29 +0100
+Subject: [PATCH 2/7] fix other regression in compounding
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Allow compound words again with
+starting "kor", "alak", "asszony", "úr"
+related to the "REP kor _kor" etc. rules
+using the Hungarian spelling dictionary.
+
+regression from...
+
+commit 73b1cad1af7ab94252f75784fa6724cf062a6966
+Author: Martin Hosken <martin_hosken at sil.org>
+Date: Mon Apr 18 16:28:26 2016 +0700
+
+ Add support for bounded conversion
+---
+ src/hunspell/affixmgr.cxx | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
+index 78c70e7..ec2093d 100644
+--- a/src/hunspell/affixmgr.cxx
++++ b/src/hunspell/affixmgr.cxx
+@@ -1290,8 +1290,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) {
+ // search every occurence of the pattern in the word
+ while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
+ std::string candidate(word);
+- size_t type = r == word ? 1 : 0;
+- if (r - word + reptable[i].pattern.size() == lenp)
++ size_t type = r == word && langnum != LANG_hu ? 1 : 0;
++ if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu)
+ type += 2;
+ candidate.replace(r - word, lenp, reptable[i].outstrings[type]);
+ if (candidate_check(candidate.c_str(), candidate.size()))
+--
+2.7.4
+
diff --git a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
new file mode 100644
index 000000000000..670d938e5441
--- /dev/null
+++ b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
@@ -0,0 +1,66 @@
+From f4ec6a283f972c82d068f4472320d424c40d45cb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
+ <laszlo.nemeth at collabora.com>
+Date: Thu, 23 Mar 2017 16:40:52 +0100
+Subject: [PATCH 5/7] fix syllable counting in compound word handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Note: one of the fixed regressions is related to an old
+hidden mistake: using clen instead of blen of the stem
+word lengths was indifferent with the original get_syllable(),
+because blen == clen at 8-bit encodings, and UTF-8
+words were handled by null-termination. Implementing Unicode
+support in Hunspell, clen was changed only in
+compound_check_morph() to blen accidentally, but not
+in compound_check(), resulting problems from the
+recent std::string conversion.
+
+Now this commit is a real fix for the regression from the
+commit c63c93237e4decdba5544a96093448605ac549c2,
+instead of the following bad fix:
+
+commit d06b0c57ae87ee8743f1bf53f80c1f8e364db619
+Author: László Németh <laszlo.nemeth at collabora.com>
+Date: Fri Mar 17 15:11:23 2017 +0100
+
+ fix Hungarian compound word handling
+---
+ src/hunspell/affixmgr.cxx | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
+index 2ed8233..3d65539 100644
+--- a/src/hunspell/affixmgr.cxx
++++ b/src/hunspell/affixmgr.cxx
+@@ -1816,7 +1816,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
+ // LANG_hu section: spec. Hungarian rule
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+- numsyllable += get_syllable(st.substr(i));
++ numsyllable += get_syllable(st.substr(0, i));
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+@@ -1901,7 +1901,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
+ (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) &&
+- (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <=
++ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
+ cpdmaxsyllable))) &&
+ (
+ // test CHECKCOMPOUNDPATTERN
+@@ -2382,7 +2382,7 @@ int AffixMgr::compound_check_morph(const char* word,
+ // LANG_hu section: spec. Hungarian rule
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+- numsyllable += get_syllable(st.substr(i));
++ numsyllable += get_syllable(st.substr(0, i));
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+--
+2.7.4
+
diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk
index b30bd083b407..40a4a101a8f0 100644
--- a/external/hunspell/UnpackedTarball_hunspell.mk
+++ b/external/hunspell/UnpackedTarball_hunspell.mk
@@ -28,6 +28,8 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \
external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \
external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \
+ external/hunspell/0002-fix-other-regression-in-compounding.patch \
+ external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch \
))
# vim: set noet sw=4 ts=4:
More information about the Libreoffice-commits
mailing list