[Libreoffice-commits] core.git: download.lst external/hunspell
Caolán McNamara
caolanm at redhat.com
Thu Sep 21 12:31:11 UTC 2017
download.lst | 4
external/hunspell/0001-cppcheck-redundant-c_str.patch | 34 -
external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch | 57 --
external/hunspell/0001-loop-via-iterators.patch | 36 -
external/hunspell/0001-unroll-this-a-bit.patch | 116 ----
external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch | 78 --
external/hunspell/0002-fix-other-regression-in-compounding.patch | 43 -
external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch | 264 ----------
external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch | 81 ---
external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch | 66 --
external/hunspell/UnpackedTarball_hunspell.mk | 9
11 files changed, 2 insertions(+), 786 deletions(-)
New commits:
commit 917bcea4b4660c516bb18691e4f8ee60313804ef
Author: Caolán McNamara <caolanm at redhat.com>
Date: Wed Sep 20 16:40:39 2017 +0100
bump hunspell to 1.6.2
Change-Id: I91d4d58f2b8ba69067de1d08476a8cebbb780535
Reviewed-on: https://gerrit.libreoffice.org/42555
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Caolán McNamara <caolanm at redhat.com>
Tested-by: Caolán McNamara <caolanm at redhat.com>
diff --git a/download.lst b/download.lst
index dd89c4a73008..08fe31d337e5 100644
--- a/download.lst
+++ b/download.lst
@@ -87,8 +87,8 @@ export HARFBUZZ_SHA256SUM := ccec4930ff0bb2d0c40aee203075447954b64a8c2695202413c
export HARFBUZZ_TARBALL := harfbuzz-1.4.8.tar.bz2
export HSQLDB_SHA256SUM := d30b13f4ba2e3b6a2d4f020c0dee0a9fb9fc6fbcc2d561f36b78da4bf3802370
export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
-export HUNSPELL_SHA256SUM := 512e7d2ee69dad0b35ca011076405e56e0f10963a02d4859dbcc4faf53ca68e2
-export HUNSPELL_TARBALL := 047c3feb121261b76dc16cdb62f54483-hunspell-1.6.0.tar.gz
+export HUNSPELL_SHA256SUM := 3cd9ceb062fe5814f668e4f22b2fa6e3ba0b339b921739541ce180cac4d6f4c4
+export HUNSPELL_TARBALL := hunspell-1.6.2.tar.gz
export HYPHEN_SHA256SUM := 304636d4eccd81a14b6914d07b84c79ebb815288c76fe027b9ebff6ff24d5705
export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
export ICU_SHA256SUM := 7132fdaf9379429d004005217f10e00b7d2319d0fea22bdfddef8991c45b75fe
diff --git a/external/hunspell/0001-cppcheck-redundant-c_str.patch b/external/hunspell/0001-cppcheck-redundant-c_str.patch
deleted file mode 100644
index 276ddd2e705b..000000000000
--- a/external/hunspell/0001-cppcheck-redundant-c_str.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 9a0baf202f67291eaf482f1bcf654e21d71943e2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 11:43:53 +0000
-Subject: [PATCH] cppcheck: redundant c_str
-
----
- src/hunspell/suggestmgr.cxx | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index b998341..8d46dd6 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1107,7 +1107,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- int sc2;
- if (utf8) {
- w_f.clear();
-- u8_u16(w_f, f.c_str());
-+ u8_u16(w_f, f);
- sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
- leftcommonsubstring(w_word, w_f);
- } else {
-@@ -1132,7 +1132,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- std::string target2 = phonet(candidate, *ph);
- w_target2.clear();
- if (utf8) {
-- u8_u16(w_target2, target2.c_str());
-+ u8_u16(w_target2, target2);
- scphon = 2 * ngram(3, w_target, w_target2,
- NGRAM_LONGER_WORSE);
- } else {
---
-2.9.3
-
diff --git a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
deleted file mode 100644
index bfcdf490a0a9..000000000000
--- a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 93156ba9a8e644f8b0b724880668714adcb0d094 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 12:05:07 +0000
-Subject: [PATCH] cppcheck: rv is reassigned before old value used
-
----
- src/hunspell/affixmgr.cxx | 6 ++----
- src/hunspell/suggestmgr.cxx | 3 +--
- 2 files changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 680cbe9..21cf384 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1494,9 +1494,8 @@ int AffixMgr::defcpd_check(hentry*** words,
- }
-
- inline int AffixMgr::candidate_check(const char* word, int len) {
-- struct hentry* rv = NULL;
-
-- rv = lookup(word);
-+ struct hentry* rv = lookup(word);
- if (rv)
- return 1;
-
-@@ -3045,10 +3044,9 @@ struct hentry* AffixMgr::affix_check(const char* word,
- int len,
- const FLAG needflag,
- char in_compound) {
-- struct hentry* rv = NULL;
-
- // check all prefixes (also crossed with suffixes if allowed)
-- rv = prefix_check(word, len, in_compound, needflag);
-+ struct hentry* rv = prefix_check(word, len, in_compound, needflag);
- if (rv)
- return rv;
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 8d46dd6..54a474f 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1675,11 +1675,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
- if (HENTRY_DATA(rv))
- p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
- while (p) {
-- struct hentry* rv2 = NULL;
- p += MORPH_TAG_LEN;
- int plen = fieldlen(p);
- std::string allomorph(p, plen);
-- rv2 = pAMgr->lookup(allomorph.c_str());
-+ struct hentry* rv2 = pAMgr->lookup(allomorph.c_str());
- while (rv2) {
- // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
- // sfxcount) {
---
-2.9.3
-
diff --git a/external/hunspell/0001-loop-via-iterators.patch b/external/hunspell/0001-loop-via-iterators.patch
deleted file mode 100644
index 6ecdd769e3bf..000000000000
--- a/external/hunspell/0001-loop-via-iterators.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From f366e97fa8d7ad21060033b733dda15299edf7c5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 15:37:11 +0000
-Subject: [PATCH 1/4] loop via iterators
-
----
- src/hunspell/csutil.cxx | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
-index c1666a5..2408677 100644
---- a/src/hunspell/csutil.cxx
-+++ b/src/hunspell/csutil.cxx
-@@ -2537,13 +2537,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
- size_t ncap = 0;
- size_t nneutral = 0;
- size_t firstcap = 0;
-- for (size_t i = 0; i < word.size(); ++i) {
-- unsigned short idx = (word[i].h << 8) + word[i].l;
-+
-+ std::vector<w_char>::const_iterator it = word.begin();
-+ std::vector<w_char>::const_iterator it_end = word.end();
-+ while (it != it_end) {
-+ unsigned short idx = (it->h << 8) + it->l;
- unsigned short lwridx = unicodetolower(idx, langnum);
- if (idx != lwridx)
- ncap++;
- if (unicodetoupper(idx, langnum) == lwridx)
- nneutral++;
-+ ++it;
- }
- if (ncap) {
- unsigned short idx = (word[0].h << 8) + word[0].l;
---
-2.9.3
-
diff --git a/external/hunspell/0001-unroll-this-a-bit.patch b/external/hunspell/0001-unroll-this-a-bit.patch
deleted file mode 100644
index 607a51a5fd1b..000000000000
--- a/external/hunspell/0001-unroll-this-a-bit.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-From bf05e232805f6c1fae5dea3c223de8bdaab425e9 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 13:26:53 +0000
-Subject: [PATCH 1/3] unroll this a bit
-
----
- src/hunspell/csutil.cxx | 49 ++++++++++++++++++++++++++++---------------------
- 1 file changed, 28 insertions(+), 21 deletions(-)
-
-diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
-index ac5cd98..c1666a5 100644
---- a/src/hunspell/csutil.cxx
-+++ b/src/hunspell/csutil.cxx
-@@ -518,18 +518,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) {
-
- w_char upper_utf(w_char u, int langnum) {
- unsigned short idx = (u.h << 8) + u.l;
-- if (idx != unicodetoupper(idx, langnum)) {
-- u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
-- u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-+ unsigned short upridx = unicodetoupper(idx, langnum);
-+ if (idx != upridx) {
-+ u.h = (unsigned char)(upridx >> 8);
-+ u.l = (unsigned char)(upridx & 0x00FF);
- }
- return u;
- }
-
- w_char lower_utf(w_char u, int langnum) {
- unsigned short idx = (u.h << 8) + u.l;
-- if (idx != unicodetolower(idx, langnum)) {
-- u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
-- u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
-+ unsigned short lwridx = unicodetolower(idx, langnum);
-+ if (idx != lwridx) {
-+ u.h = (unsigned char)(lwridx >> 8);
-+ u.l = (unsigned char)(lwridx & 0x00FF);
- }
- return u;
- }
-@@ -551,12 +553,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
- }
-
- std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
-- int langnum) {
-+ int langnum) {
- for (size_t i = 0; i < u.size(); ++i) {
- unsigned short idx = (u[i].h << 8) + u[i].l;
-- if (idx != unicodetolower(idx, langnum)) {
-- u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
-- u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
-+ unsigned short lwridx = unicodetolower(idx, langnum);
-+ if (idx != lwridx) {
-+ u[i].h = (unsigned char)(lwridx >> 8);
-+ u[i].l = (unsigned char)(lwridx & 0x00FF);
- }
- }
- return u;
-@@ -565,9 +568,10 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
- std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
- for (size_t i = 0; i < u.size(); i++) {
- unsigned short idx = (u[i].h << 8) + u[i].l;
-- if (idx != unicodetoupper(idx, langnum)) {
-- u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
-- u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-+ unsigned short upridx = unicodetoupper(idx, langnum);
-+ if (idx != upridx) {
-+ u[i].h = (unsigned char)(upridx >> 8);
-+ u[i].l = (unsigned char)(upridx & 0x00FF);
- }
- }
- return u;
-@@ -583,9 +587,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
- std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
- if (!u.empty()) {
- unsigned short idx = (u[0].h << 8) + u[0].l;
-- if (idx != unicodetoupper(idx, langnum)) {
-- u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
-- u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-+ unsigned short upridx = unicodetoupper(idx, langnum);
-+ if (idx != upridx) {
-+ u[0].h = (unsigned char)(upridx >> 8);
-+ u[0].l = (unsigned char)(upridx & 0x00FF);
- }
- }
- return u;
-@@ -601,9 +606,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
- std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
- if (!u.empty()) {
- unsigned short idx = (u[0].h << 8) + u[0].l;
-- if (idx != unicodetolower(idx, langnum)) {
-- u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
-- u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
-+ unsigned short lwridx = unicodetolower(idx, langnum);
-+ if (idx != lwridx) {
-+ u[0].h = (unsigned char)(lwridx >> 8);
-+ u[0].l = (unsigned char)(lwridx & 0x00FF);
- }
- }
- return u;
-@@ -2533,9 +2539,10 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
- size_t firstcap = 0;
- for (size_t i = 0; i < word.size(); ++i) {
- unsigned short idx = (word[i].h << 8) + word[i].l;
-- if (idx != unicodetolower(idx, langnum))
-+ unsigned short lwridx = unicodetolower(idx, langnum);
-+ if (idx != lwridx)
- ncap++;
-- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum))
-+ if (unicodetoupper(idx, langnum) == lwridx)
- nneutral++;
- }
- if (ncap) {
---
-2.9.3
-
diff --git a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
deleted file mode 100644
index 88695ec027d5..000000000000
--- a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 15:49:17 +0000
-Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer
-
-kcachegrind reports 1,057,506,901 -> 830,529,143 on
-
-echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
----
- src/hunspell/hashmgr.cxx | 16 +++++++++++-----
- src/hunspell/hashmgr.hxx | 1 +
- 2 files changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
-index 1de1690..4844b49 100644
---- a/src/hunspell/hashmgr.cxx
-+++ b/src/hunspell/hashmgr.cxx
-@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- }
-
- // detect captype and modify word length for UTF-8 encoding
--int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-+int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
- int len;
- if (utf8) {
-- std::vector<w_char> dest_utf;
-- len = u8_u16(dest_utf, word);
-- *captype = get_captype_utf8(dest_utf, langnum);
-+ len = u8_u16(workbuf, word);
-+ *captype = get_captype_utf8(workbuf, langnum);
- } else {
- len = word.size();
- *captype = get_captype(word, csconv);
-@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
- return len;
- }
-
-+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-+ std::vector<w_char> workbuf;
-+ return get_clen_and_captype(word, captype, workbuf);
-+}
-+
- // remove word (personal dictionary function for standalone applications)
- int HashMgr::remove(const std::string& word) {
- struct hentry* dp = lookup(word.c_str());
-@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
- // loop through all words on much list and add to hash
- // table and create word and affix strings
-
-+ std::vector<w_char> workbuf;
-+
- while (dict->getline(ts)) {
- mychomp(ts);
- // split each line into word and morphological description
-@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
- }
-
- int captype;
-- int wcl = get_clen_and_captype(ts, &captype);
-+ int wcl = get_clen_and_captype(ts, &captype, workbuf);
- const std::string *dp_str = dp.empty() ? NULL : &dp;
- // add the word and its index plus its capitalized form optionally
- if (add_word(ts, wcl, flags, al, dp_str, false) ||
-diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
-index 812171a..5a09c45 100644
---- a/src/hunspell/hashmgr.hxx
-+++ b/src/hunspell/hashmgr.hxx
-@@ -125,6 +125,7 @@ class HashMgr {
-
- private:
- int get_clen_and_captype(const std::string& word, int* captype);
-+ int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
- int load_tables(const char* tpath, const char* key);
- int add_word(const std::string& word,
- int wcl,
---
-2.9.3
-
diff --git a/external/hunspell/0002-fix-other-regression-in-compounding.patch b/external/hunspell/0002-fix-other-regression-in-compounding.patch
deleted file mode 100644
index cbf29e54f93a..000000000000
--- a/external/hunspell/0002-fix-other-regression-in-compounding.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1fada01663b29b57c010a9c274e45a5cf9ecf222 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
- <laszlo.nemeth at collabora.com>
-Date: Sun, 19 Mar 2017 13:19:29 +0100
-Subject: [PATCH 2/7] fix other regression in compounding
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Allow compound words again with
-starting "kor", "alak", "asszony", "úr"
-related to the "REP kor _kor" etc. rules
-using the Hungarian spelling dictionary.
-
-regression from...
-
-commit 73b1cad1af7ab94252f75784fa6724cf062a6966
-Author: Martin Hosken <martin_hosken at sil.org>
-Date: Mon Apr 18 16:28:26 2016 +0700
-
- Add support for bounded conversion
----
- src/hunspell/affixmgr.cxx | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 78c70e7..ec2093d 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1290,8 +1290,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) {
- // search every occurence of the pattern in the word
- while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
- std::string candidate(word);
-- size_t type = r == word ? 1 : 0;
-- if (r - word + reptable[i].pattern.size() == lenp)
-+ size_t type = r == word && langnum != LANG_hu ? 1 : 0;
-+ if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu)
- type += 2;
- candidate.replace(r - word, lenp, reptable[i].outstrings[type]);
- if (candidate_check(candidate.c_str(), candidate.size()))
---
-2.7.4
-
diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
deleted file mode 100644
index ff2530cfe23d..000000000000
--- a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
+++ /dev/null
@@ -1,264 +0,0 @@
-From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 16:36:27 +0000
-Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest
-
-only lower when we have to and reuse scratch buffers as
-tolower destination
-
-kcachegrind reports 830,529,143 -> 779,887,690 on
-
-echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
----
- src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++---------------
- 1 file changed, 95 insertions(+), 48 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 54a474f..ea52707 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- u8_u16(w_target, target);
- }
-
-- std::vector<w_char> w_entry;
- std::string f;
- std::vector<w_char> w_f;
-- std::vector<w_char> w_target2;
-
- for (size_t i = 0; i < rHMgr.size(); ++i) {
- while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
-@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- continue;
-
- if (utf8) {
-- w_entry.clear();
-- u8_u16(w_entry, HENTRY_WORD(hp));
-- sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) +
-- leftcommonsubstring(w_word, w_entry);
-+ w_f.clear();
-+ u8_u16(w_f, HENTRY_WORD(hp));
-+
-+ int leftcommon = leftcommonsubstring(w_word, w_f);
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall_utf(w_f, langnum);
-+ }
-+ sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
- } else {
-- sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
-- leftcommonsubstring(word, HENTRY_WORD(hp));
-+ f.assign(HENTRY_WORD(hp));
-+
-+ int leftcommon = leftcommonsubstring(word, f.c_str());
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall(f, csconv);
-+ }
-+ sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
- }
-
- // check special pronounciation
-@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- if (utf8) {
- w_f.clear();
- u8_u16(w_f, f);
-- sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
-- leftcommonsubstring(w_word, w_f);
-+
-+ int leftcommon = leftcommonsubstring(w_word, w_f);
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall_utf(w_f, langnum);
-+ }
-+ sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
- } else {
-- sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
-- leftcommonsubstring(word, f.c_str());
-+ int leftcommon = leftcommonsubstring(word, f.c_str());
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall(f, csconv);
-+ }
-+ sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
- }
- if (sc2 > sc)
- sc = sc2;
-@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- candidate = HENTRY_WORD(hp);
- mkallcap(candidate, csconv);
- }
-- std::string target2 = phonet(candidate, *ph);
-- w_target2.clear();
-+ f = phonet(candidate, *ph);
-+ w_f.clear();
- if (utf8) {
-- u8_u16(w_target2, target2);
-- scphon = 2 * ngram(3, w_target, w_target2,
-+ u8_u16(w_f, f);
-+ scphon = 2 * ngram(3, w_target, w_f,
- NGRAM_LONGER_WORSE);
- } else {
-- scphon = 2 * ngram(3, target, target2,
-+ scphon = 2 * ngram(3, target, f,
- NGRAM_LONGER_WORSE);
- }
- }
-@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- w_mw[k].l = '*';
- w_mw[k].h = 0;
- }
-- thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low);
-+
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall_utf(w_mw, langnum);
-+ }
-+
-+ thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
- } else {
- std::string mw = word;
- for (int k = sp; k < n; k += 4)
- mw[k] = '*';
-- thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
-+
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall(mw, csconv);
-+ }
-+
-+ thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
- }
- }
- thresh = thresh / 3;
-@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- return;
- }
-
-- std::vector<w_char> w_glst_word;
- for (int i = 0; i < MAX_ROOTS; i++) {
- if (roots[i]) {
- struct hentry* rp = roots[i];
-@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-
- for (int k = 0; k < nw; k++) {
- if (utf8) {
-- w_glst_word.clear();
-- u8_u16(w_glst_word, glst[k].word);
-- sc = ngram(n, w_word, w_glst_word,
-- NGRAM_ANY_MISMATCH + low) +
-- leftcommonsubstring(w_word, w_glst_word);
-+ w_f.clear();
-+ u8_u16(w_f, glst[k].word);
-+
-+ int leftcommon = leftcommonsubstring(w_word, w_f);
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall_utf(w_f, langnum);
-+ }
-+
-+ sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
- } else {
-- sc = ngram(n, word, glst[k].word,
-- NGRAM_ANY_MISMATCH + low) +
-- leftcommonsubstring(word, glst[k].word);
-+ f = glst[k].word;
-+
-+ int leftcommon = leftcommonsubstring(word, f.c_str());
-+ if (low) {
-+ // lowering dictionary word
-+ mkallsmall(f, csconv);
-+ }
-+
-+ sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
- }
-
- if (sc > thresh) {
-@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- w_gl.clear();
- if (utf8) {
- u8_u16(w_gl, gl);
-- re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
-- ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
-+ //w_gl is lowercase already at this point
-+ re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+ if (low) {
-+ w_f = w_word;
-+ // lowering dictionary word
-+ mkallsmall_utf(w_f, langnum);
-+ re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+ } else {
-+ re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+ }
- } else {
-- re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
-- ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
-+ //gl is lowercase already at this point
-+ re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+ if (low) {
-+ f = word;
-+ // lowering dictionary word
-+ mkallsmall(f, csconv);
-+ re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+ } else {
-+ re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+ }
- }
-
- int ngram_score, leftcommon_score;
- if (utf8) {
-- ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low);
-+ //w_gl is lowercase already at this point
-+ ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
- leftcommon_score = leftcommonsubstring(w_word, w_gl);
- } else {
-- ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low);
-+ //gl is lowercase already at this point
-+ ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
- leftcommon_score = leftcommonsubstring(word, gl.c_str());
- }
- gscore[i] =
-@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n,
- l2 = su2.size();
- if (l2 == 0)
- return 0;
-- // lowering dictionary word
-- const std::vector<w_char>* p_su2 = &su2;
-- std::vector<w_char> su2_copy;
-- if (opt & NGRAM_LOWERING) {
-- su2_copy = su2;
-- mkallsmall_utf(su2_copy, langnum);
-- p_su2 = &su2_copy;
-- }
- for (int j = 1; j <= n; j++) {
- ns = 0;
- for (int i = 0; i <= (l1 - j); i++) {
-@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n,
- for (int l = 0; l <= (l2 - j); l++) {
- for (k = 0; k < j; k++) {
- const w_char& c1 = su1[i + k];
-- const w_char& c2 = (*p_su2)[l + k];
-+ const w_char& c2 = su2[l + k];
- if ((c1.l != c2.l) || (c1.h != c2.h))
- break;
- }
-@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n,
- if (l2 == 0)
- return 0;
- l1 = s1.size();
-- std::string t(s2);
-- if (opt & NGRAM_LOWERING)
-- mkallsmall(t, csconv);
- for (int j = 1; j <= n; j++) {
- ns = 0;
- for (int i = 0; i <= (l1 - j); i++) {
-- //t is haystack, s1[i..i+j) is needle
-- if (t.find(s1.c_str()+i, 0, j) != std::string::npos) {
-+ //s2 is haystack, s1[i..i+j) is needle
-+ if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
- ns++;
- } else if (opt & NGRAM_WEIGHTED) {
- ns--;
---
-2.9.3
-
diff --git a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
deleted file mode 100644
index 6c8a108d6719..000000000000
--- a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From aab258adbd9c78931a36b96e58975a08000249a8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 17:14:35 +0000
-Subject: [PATCH 4/4] either clear will be called anyway before use, or its
- unused afterwards
-
----
- src/hunspell/suggestmgr.cxx | 8 --------
- 1 file changed, 8 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index ea52707..ae34535 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1089,7 +1089,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- continue;
-
- if (utf8) {
-- w_f.clear();
- u8_u16(w_f, HENTRY_WORD(hp));
-
- int leftcommon = leftcommonsubstring(w_word, w_f);
-@@ -1115,7 +1114,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
- int sc2;
- if (utf8) {
-- w_f.clear();
- u8_u16(w_f, f);
-
- int leftcommon = leftcommonsubstring(w_word, w_f);
-@@ -1139,7 +1137,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- int scphon = -20000;
- if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
- if (utf8) {
-- w_candidate.clear();
- u8_u16(w_candidate, HENTRY_WORD(hp));
- mkallcap_utf(w_candidate, langnum);
- u16_u8(candidate, w_candidate);
-@@ -1148,7 +1145,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- mkallcap(candidate, csconv);
- }
- f = phonet(candidate, *ph);
-- w_f.clear();
- if (utf8) {
- u8_u16(w_f, f);
- scphon = 2 * ngram(3, w_target, w_f,
-@@ -1254,7 +1250,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-
- for (int k = 0; k < nw; k++) {
- if (utf8) {
-- w_f.clear();
- u8_u16(w_f, glst[k].word);
-
- int leftcommon = leftcommonsubstring(w_word, w_f);
-@@ -1335,7 +1330,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- std::string gl;
- int len;
- if (utf8) {
-- w_gl.clear();
- len = u8_u16(w_gl, guess[i]);
- mkallsmall_utf(w_gl, langnum);
- u16_u8(gl, w_gl);
-@@ -1355,7 +1349,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- }
- // using 2-gram instead of 3, and other weightening
-
-- w_gl.clear();
- if (utf8) {
- u8_u16(w_gl, gl);
- //w_gl is lowercase already at this point
-@@ -1421,7 +1414,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- // lowering rootphon[i]
- std::string gl;
- int len;
-- w_gl.clear();
- if (utf8) {
- len = u8_u16(w_gl, rootsphon[i]);
- mkallsmall_utf(w_gl, langnum);
---
-2.9.3
-
diff --git a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
deleted file mode 100644
index 670d938e5441..000000000000
--- a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From f4ec6a283f972c82d068f4472320d424c40d45cb Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
- <laszlo.nemeth at collabora.com>
-Date: Thu, 23 Mar 2017 16:40:52 +0100
-Subject: [PATCH 5/7] fix syllable counting in compound word handling
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Note: one of the fixed regressions is related to an old
-hidden mistake: using clen instead of blen of the stem
-word lengths was indifferent with the original get_syllable(),
-because blen == clen at 8-bit encodings, and UTF-8
-words were handled by null-termination. Implementing Unicode
-support in Hunspell, clen was changed only in
-compound_check_morph() to blen accidentally, but not
-in compound_check(), resulting problems from the
-recent std::string conversion.
-
-Now this commit is a real fix for the regression from the
-commit c63c93237e4decdba5544a96093448605ac549c2,
-instead of the following bad fix:
-
-commit d06b0c57ae87ee8743f1bf53f80c1f8e364db619
-Author: László Németh <laszlo.nemeth at collabora.com>
-Date: Fri Mar 17 15:11:23 2017 +0100
-
- fix Hungarian compound word handling
----
- src/hunspell/affixmgr.cxx | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 2ed8233..3d65539 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1816,7 +1816,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
- // LANG_hu section: spec. Hungarian rule
- if (langnum == LANG_hu) {
- // calculate syllable number of the word
-- numsyllable += get_syllable(st.substr(i));
-+ numsyllable += get_syllable(st.substr(0, i));
- // + 1 word, if syllable number of the prefix > 1 (hungarian
- // convention)
- if (pfx && (get_syllable(pfx->getKey()) > 1))
-@@ -1901,7 +1901,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
- (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
- (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
- ((cpdmaxsyllable != 0) &&
-- (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <=
-+ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
- cpdmaxsyllable))) &&
- (
- // test CHECKCOMPOUNDPATTERN
-@@ -2382,7 +2382,7 @@ int AffixMgr::compound_check_morph(const char* word,
- // LANG_hu section: spec. Hungarian rule
- if (langnum == LANG_hu) {
- // calculate syllable number of the word
-- numsyllable += get_syllable(st.substr(i));
-+ numsyllable += get_syllable(st.substr(0, i));
-
- // + 1 word, if syllable number of the prefix > 1 (hungarian
- // convention)
---
-2.7.4
-
diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk
index 40a4a101a8f0..3bb7e5e42dc7 100644
--- a/external/hunspell/UnpackedTarball_hunspell.mk
+++ b/external/hunspell/UnpackedTarball_hunspell.mk
@@ -21,15 +21,6 @@ $(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1))
$(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \
- external/hunspell/0001-unroll-this-a-bit.patch \
- external/hunspell/0001-cppcheck-redundant-c_str.patch \
- external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch \
- external/hunspell/0001-loop-via-iterators.patch \
- external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \
- external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \
- external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \
- external/hunspell/0002-fix-other-regression-in-compounding.patch \
- external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch \
))
# vim: set noet sw=4 ts=4:
More information about the Libreoffice-commits
mailing list