[Libreoffice-commits] core.git: download.lst external/hunspell

Caolán McNamara caolanm at redhat.com
Thu Sep 21 12:31:11 UTC 2017


 download.lst                                                                      |    4 
 external/hunspell/0001-cppcheck-redundant-c_str.patch                             |   34 -
 external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch      |   57 --
 external/hunspell/0001-loop-via-iterators.patch                                   |   36 -
 external/hunspell/0001-unroll-this-a-bit.patch                                    |  116 ----
 external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch |   78 --
 external/hunspell/0002-fix-other-regression-in-compounding.patch                  |   43 -
 external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch        |  264 ----------
 external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch |   81 ---
 external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch      |   66 --
 external/hunspell/UnpackedTarball_hunspell.mk                                     |    9 
 11 files changed, 2 insertions(+), 786 deletions(-)

New commits:
commit 917bcea4b4660c516bb18691e4f8ee60313804ef
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Wed Sep 20 16:40:39 2017 +0100

    bump hunspell to 1.6.2
    
    Change-Id: I91d4d58f2b8ba69067de1d08476a8cebbb780535
    Reviewed-on: https://gerrit.libreoffice.org/42555
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>
    Tested-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/download.lst b/download.lst
index dd89c4a73008..08fe31d337e5 100644
--- a/download.lst
+++ b/download.lst
@@ -87,8 +87,8 @@ export HARFBUZZ_SHA256SUM := ccec4930ff0bb2d0c40aee203075447954b64a8c2695202413c
 export HARFBUZZ_TARBALL := harfbuzz-1.4.8.tar.bz2
 export HSQLDB_SHA256SUM := d30b13f4ba2e3b6a2d4f020c0dee0a9fb9fc6fbcc2d561f36b78da4bf3802370
 export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
-export HUNSPELL_SHA256SUM := 512e7d2ee69dad0b35ca011076405e56e0f10963a02d4859dbcc4faf53ca68e2
-export HUNSPELL_TARBALL := 047c3feb121261b76dc16cdb62f54483-hunspell-1.6.0.tar.gz
+export HUNSPELL_SHA256SUM := 3cd9ceb062fe5814f668e4f22b2fa6e3ba0b339b921739541ce180cac4d6f4c4
+export HUNSPELL_TARBALL := hunspell-1.6.2.tar.gz
 export HYPHEN_SHA256SUM := 304636d4eccd81a14b6914d07b84c79ebb815288c76fe027b9ebff6ff24d5705
 export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
 export ICU_SHA256SUM := 7132fdaf9379429d004005217f10e00b7d2319d0fea22bdfddef8991c45b75fe
diff --git a/external/hunspell/0001-cppcheck-redundant-c_str.patch b/external/hunspell/0001-cppcheck-redundant-c_str.patch
deleted file mode 100644
index 276ddd2e705b..000000000000
--- a/external/hunspell/0001-cppcheck-redundant-c_str.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 9a0baf202f67291eaf482f1bcf654e21d71943e2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 11:43:53 +0000
-Subject: [PATCH] cppcheck: redundant c_str
-
----
- src/hunspell/suggestmgr.cxx | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index b998341..8d46dd6 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1107,7 +1107,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         int sc2;
-         if (utf8) {
-           w_f.clear();
--          u8_u16(w_f, f.c_str());
-+          u8_u16(w_f, f);
-           sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
-                 leftcommonsubstring(w_word, w_f);
-         } else {
-@@ -1132,7 +1132,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         std::string target2 = phonet(candidate, *ph);
-         w_target2.clear();
-         if (utf8) {
--          u8_u16(w_target2, target2.c_str());
-+          u8_u16(w_target2, target2);
-           scphon = 2 * ngram(3, w_target, w_target2,
-                              NGRAM_LONGER_WORSE);
-         } else {
--- 
-2.9.3
-
diff --git a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
deleted file mode 100644
index bfcdf490a0a9..000000000000
--- a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 93156ba9a8e644f8b0b724880668714adcb0d094 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 12:05:07 +0000
-Subject: [PATCH] cppcheck: rv is reassigned before old value used
-
----
- src/hunspell/affixmgr.cxx   | 6 ++----
- src/hunspell/suggestmgr.cxx | 3 +--
- 2 files changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 680cbe9..21cf384 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1494,9 +1494,8 @@ int AffixMgr::defcpd_check(hentry*** words,
- }
- 
- inline int AffixMgr::candidate_check(const char* word, int len) {
--  struct hentry* rv = NULL;
- 
--  rv = lookup(word);
-+  struct hentry* rv = lookup(word);
-   if (rv)
-     return 1;
- 
-@@ -3045,10 +3044,9 @@ struct hentry* AffixMgr::affix_check(const char* word,
-                                      int len,
-                                      const FLAG needflag,
-                                      char in_compound) {
--  struct hentry* rv = NULL;
- 
-   // check all prefixes (also crossed with suffixes if allowed)
--  rv = prefix_check(word, len, in_compound, needflag);
-+  struct hentry* rv = prefix_check(word, len, in_compound, needflag);
-   if (rv)
-     return rv;
- 
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 8d46dd6..54a474f 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1675,11 +1675,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
-   if (HENTRY_DATA(rv))
-     p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
-   while (p) {
--    struct hentry* rv2 = NULL;
-     p += MORPH_TAG_LEN;
-     int plen = fieldlen(p);
-     std::string allomorph(p, plen);
--    rv2 = pAMgr->lookup(allomorph.c_str());
-+    struct hentry* rv2 = pAMgr->lookup(allomorph.c_str());
-     while (rv2) {
-       //            if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
-       //            sfxcount) {
--- 
-2.9.3
-
diff --git a/external/hunspell/0001-loop-via-iterators.patch b/external/hunspell/0001-loop-via-iterators.patch
deleted file mode 100644
index 6ecdd769e3bf..000000000000
--- a/external/hunspell/0001-loop-via-iterators.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From f366e97fa8d7ad21060033b733dda15299edf7c5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 15:37:11 +0000
-Subject: [PATCH 1/4] loop via iterators
-
----
- src/hunspell/csutil.cxx | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
-index c1666a5..2408677 100644
---- a/src/hunspell/csutil.cxx
-+++ b/src/hunspell/csutil.cxx
-@@ -2537,13 +2537,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
-   size_t ncap = 0;
-   size_t nneutral = 0;
-   size_t firstcap = 0;
--  for (size_t i = 0; i < word.size(); ++i) {
--    unsigned short idx = (word[i].h << 8) + word[i].l;
-+
-+  std::vector<w_char>::const_iterator it = word.begin();
-+  std::vector<w_char>::const_iterator it_end = word.end();
-+  while (it != it_end) {
-+    unsigned short idx = (it->h << 8) + it->l;
-     unsigned short lwridx = unicodetolower(idx, langnum);
-     if (idx != lwridx)
-       ncap++;
-     if (unicodetoupper(idx, langnum) == lwridx)
-       nneutral++;
-+    ++it;
-   }
-   if (ncap) {
-     unsigned short idx = (word[0].h << 8) + word[0].l;
--- 
-2.9.3
-
diff --git a/external/hunspell/0001-unroll-this-a-bit.patch b/external/hunspell/0001-unroll-this-a-bit.patch
deleted file mode 100644
index 607a51a5fd1b..000000000000
--- a/external/hunspell/0001-unroll-this-a-bit.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-From bf05e232805f6c1fae5dea3c223de8bdaab425e9 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 13:26:53 +0000
-Subject: [PATCH 1/3] unroll this a bit
-
----
- src/hunspell/csutil.cxx | 49 ++++++++++++++++++++++++++++---------------------
- 1 file changed, 28 insertions(+), 21 deletions(-)
-
-diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
-index ac5cd98..c1666a5 100644
---- a/src/hunspell/csutil.cxx
-+++ b/src/hunspell/csutil.cxx
-@@ -518,18 +518,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) {
- 
- w_char upper_utf(w_char u, int langnum) {
-   unsigned short idx = (u.h << 8) + u.l;
--  if (idx != unicodetoupper(idx, langnum)) {
--    u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
--    u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-+  unsigned short upridx = unicodetoupper(idx, langnum);
-+  if (idx != upridx) {
-+    u.h = (unsigned char)(upridx >> 8);
-+    u.l = (unsigned char)(upridx & 0x00FF);
-   }
-   return u;
- }
- 
- w_char lower_utf(w_char u, int langnum) {
-   unsigned short idx = (u.h << 8) + u.l;
--  if (idx != unicodetolower(idx, langnum)) {
--    u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
--    u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
-+  unsigned short lwridx = unicodetolower(idx, langnum);
-+  if (idx != lwridx) {
-+    u.h = (unsigned char)(lwridx >> 8);
-+    u.l = (unsigned char)(lwridx & 0x00FF);
-   }
-   return u;
- }
-@@ -551,12 +553,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
- }
- 
- std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
--                                    int langnum) {
-+                                          int langnum) {
-   for (size_t i = 0; i < u.size(); ++i) {
-     unsigned short idx = (u[i].h << 8) + u[i].l;
--    if (idx != unicodetolower(idx, langnum)) {
--      u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
--      u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
-+    unsigned short lwridx = unicodetolower(idx, langnum);
-+    if (idx != lwridx) {
-+      u[i].h = (unsigned char)(lwridx >> 8);
-+      u[i].l = (unsigned char)(lwridx & 0x00FF);
-     }
-   }
-   return u;
-@@ -565,9 +568,10 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
- std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
-   for (size_t i = 0; i < u.size(); i++) {
-     unsigned short idx = (u[i].h << 8) + u[i].l;
--    if (idx != unicodetoupper(idx, langnum)) {
--      u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
--      u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-+    unsigned short upridx = unicodetoupper(idx, langnum);
-+    if (idx != upridx) {
-+      u[i].h = (unsigned char)(upridx >> 8);
-+      u[i].l = (unsigned char)(upridx & 0x00FF);
-     }
-   }
-   return u;
-@@ -583,9 +587,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
- std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
-   if (!u.empty()) {
-     unsigned short idx = (u[0].h << 8) + u[0].l;
--    if (idx != unicodetoupper(idx, langnum)) {
--      u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
--      u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
-+    unsigned short upridx = unicodetoupper(idx, langnum);
-+    if (idx != upridx) {
-+      u[0].h = (unsigned char)(upridx >> 8);
-+      u[0].l = (unsigned char)(upridx & 0x00FF);
-     }
-   }
-   return u;
-@@ -601,9 +606,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
- std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
-   if (!u.empty()) {
-     unsigned short idx = (u[0].h << 8) + u[0].l;
--    if (idx != unicodetolower(idx, langnum)) {
--      u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
--      u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
-+    unsigned short lwridx = unicodetolower(idx, langnum);
-+    if (idx != lwridx) {
-+      u[0].h = (unsigned char)(lwridx >> 8);
-+      u[0].l = (unsigned char)(lwridx & 0x00FF);
-     }
-   }
-   return u;
-@@ -2533,9 +2539,10 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
-   size_t firstcap = 0;
-   for (size_t i = 0; i < word.size(); ++i) {
-     unsigned short idx = (word[i].h << 8) + word[i].l;
--    if (idx != unicodetolower(idx, langnum))
-+    unsigned short lwridx = unicodetolower(idx, langnum);
-+    if (idx != lwridx)
-       ncap++;
--    if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum))
-+    if (unicodetoupper(idx, langnum) == lwridx)
-       nneutral++;
-   }
-   if (ncap) {
--- 
-2.9.3
-
diff --git a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
deleted file mode 100644
index 88695ec027d5..000000000000
--- a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 15:49:17 +0000
-Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer
-
-kcachegrind reports 1,057,506,901 -> 830,529,143 on
-
-echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
----
- src/hunspell/hashmgr.cxx | 16 +++++++++++-----
- src/hunspell/hashmgr.hxx |  1 +
- 2 files changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
-index 1de1690..4844b49 100644
---- a/src/hunspell/hashmgr.cxx
-+++ b/src/hunspell/hashmgr.cxx
-@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- }
- 
- // detect captype and modify word length for UTF-8 encoding
--int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-+int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
-   int len;
-   if (utf8) {
--    std::vector<w_char> dest_utf;
--    len = u8_u16(dest_utf, word);
--    *captype = get_captype_utf8(dest_utf, langnum);
-+    len = u8_u16(workbuf, word);
-+    *captype = get_captype_utf8(workbuf, langnum);
-   } else {
-     len = word.size();
-     *captype = get_captype(word, csconv);
-@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-   return len;
- }
- 
-+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-+  std::vector<w_char> workbuf;
-+  return get_clen_and_captype(word, captype, workbuf);
-+}
-+
- // remove word (personal dictionary function for standalone applications)
- int HashMgr::remove(const std::string& word) {
-   struct hentry* dp = lookup(word.c_str());
-@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
-   // loop through all words on much list and add to hash
-   // table and create word and affix strings
- 
-+  std::vector<w_char> workbuf;
-+
-   while (dict->getline(ts)) {
-     mychomp(ts);
-     // split each line into word and morphological description
-@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
-     }
- 
-     int captype;
--    int wcl = get_clen_and_captype(ts, &captype);
-+    int wcl = get_clen_and_captype(ts, &captype, workbuf);
-     const std::string *dp_str = dp.empty() ? NULL : &dp;
-     // add the word and its index plus its capitalized form optionally
-     if (add_word(ts, wcl, flags, al, dp_str, false) ||
-diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
-index 812171a..5a09c45 100644
---- a/src/hunspell/hashmgr.hxx
-+++ b/src/hunspell/hashmgr.hxx
-@@ -125,6 +125,7 @@ class HashMgr {
- 
-  private:
-   int get_clen_and_captype(const std::string& word, int* captype);
-+  int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
-   int load_tables(const char* tpath, const char* key);
-   int add_word(const std::string& word,
-                int wcl,
--- 
-2.9.3
-
diff --git a/external/hunspell/0002-fix-other-regression-in-compounding.patch b/external/hunspell/0002-fix-other-regression-in-compounding.patch
deleted file mode 100644
index cbf29e54f93a..000000000000
--- a/external/hunspell/0002-fix-other-regression-in-compounding.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 1fada01663b29b57c010a9c274e45a5cf9ecf222 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
- <laszlo.nemeth at collabora.com>
-Date: Sun, 19 Mar 2017 13:19:29 +0100
-Subject: [PATCH 2/7] fix other regression in compounding
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Allow compound words again with
-starting "kor", "alak", "asszony", "úr"
-related to the "REP kor _kor" etc. rules
-using the Hungarian spelling dictionary.
-
-regression from...
-
-commit 73b1cad1af7ab94252f75784fa6724cf062a6966
-Author: Martin Hosken <martin_hosken at sil.org>
-Date:   Mon Apr 18 16:28:26 2016 +0700
-
-    Add support for bounded conversion
----
- src/hunspell/affixmgr.cxx | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 78c70e7..ec2093d 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1290,8 +1290,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) {
-     // search every occurence of the pattern in the word
-     while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
-       std::string candidate(word);
--      size_t type = r == word ? 1 : 0;
--      if (r - word + reptable[i].pattern.size() == lenp)
-+      size_t type = r == word && langnum != LANG_hu ? 1 : 0;
-+      if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu)
-         type += 2;
-       candidate.replace(r - word, lenp, reptable[i].outstrings[type]);
-       if (candidate_check(candidate.c_str(), candidate.size()))
--- 
-2.7.4
-
diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
deleted file mode 100644
index ff2530cfe23d..000000000000
--- a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
+++ /dev/null
@@ -1,264 +0,0 @@
-From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 16:36:27 +0000
-Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest
-
-only lower when we have to and reuse scratch buffers as
-tolower destination
-
-kcachegrind reports 830,529,143 -> 779,887,690 on
-
-echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
----
- src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++---------------
- 1 file changed, 95 insertions(+), 48 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 54a474f..ea52707 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-     u8_u16(w_target, target);
-   }
-   
--  std::vector<w_char> w_entry;
-   std::string f;
-   std::vector<w_char> w_f;
--  std::vector<w_char> w_target2;
-   
-   for (size_t i = 0; i < rHMgr.size(); ++i) {
-     while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
-@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         continue;
- 
-       if (utf8) {
--        w_entry.clear();
--        u8_u16(w_entry, HENTRY_WORD(hp));
--        sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) +
--             leftcommonsubstring(w_word, w_entry);
-+        w_f.clear();
-+        u8_u16(w_f, HENTRY_WORD(hp));
-+
-+        int leftcommon = leftcommonsubstring(w_word, w_f);
-+        if (low) {
-+          // lowering dictionary word
-+          mkallsmall_utf(w_f, langnum);
-+        }
-+        sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
-       } else {
--        sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
--             leftcommonsubstring(word, HENTRY_WORD(hp));
-+        f.assign(HENTRY_WORD(hp));
-+
-+        int leftcommon = leftcommonsubstring(word, f.c_str());
-+        if (low) {
-+          // lowering dictionary word
-+          mkallsmall(f, csconv);
-+        }
-+        sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
-       }
- 
-       // check special pronounciation
-@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         if (utf8) {
-           w_f.clear();
-           u8_u16(w_f, f);
--          sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
--                leftcommonsubstring(w_word, w_f);
-+
-+          int leftcommon = leftcommonsubstring(w_word, w_f);
-+          if (low) {
-+            // lowering dictionary word
-+            mkallsmall_utf(w_f, langnum);
-+          }
-+          sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
-         } else {
--          sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
--                leftcommonsubstring(word, f.c_str());
-+          int leftcommon = leftcommonsubstring(word, f.c_str());
-+          if (low) {
-+            // lowering dictionary word
-+            mkallsmall(f, csconv);
-+          }
-+          sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
-         }
-         if (sc2 > sc)
-           sc = sc2;
-@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-           candidate = HENTRY_WORD(hp);
-           mkallcap(candidate, csconv);
-         }
--        std::string target2 = phonet(candidate, *ph);
--        w_target2.clear();
-+        f = phonet(candidate, *ph);
-+        w_f.clear();
-         if (utf8) {
--          u8_u16(w_target2, target2);
--          scphon = 2 * ngram(3, w_target, w_target2,
-+          u8_u16(w_f, f);
-+          scphon = 2 * ngram(3, w_target, w_f,
-                              NGRAM_LONGER_WORSE);
-         } else {
--          scphon = 2 * ngram(3, target, target2,
-+          scphon = 2 * ngram(3, target, f,
-                              NGRAM_LONGER_WORSE);
-         }
-       }
-@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         w_mw[k].l = '*';
-         w_mw[k].h = 0;
-       }
--      thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low);
-+
-+      if (low) {
-+        // lowering dictionary word
-+        mkallsmall_utf(w_mw, langnum);
-+      }
-+
-+      thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
-     } else {
-       std::string mw = word;
-       for (int k = sp; k < n; k += 4)
-         mw[k] = '*';
--      thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
-+
-+      if (low) {
-+        // lowering dictionary word
-+        mkallsmall(mw, csconv);
-+      }
-+
-+      thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
-     }
-   }
-   thresh = thresh / 3;
-@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-     return;
-   }
- 
--  std::vector<w_char> w_glst_word;
-   for (int i = 0; i < MAX_ROOTS; i++) {
-     if (roots[i]) {
-       struct hentry* rp = roots[i];
-@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- 
-       for (int k = 0; k < nw; k++) {
-         if (utf8) {
--          w_glst_word.clear();
--          u8_u16(w_glst_word, glst[k].word);
--          sc = ngram(n, w_word, w_glst_word,
--                     NGRAM_ANY_MISMATCH + low) +
--               leftcommonsubstring(w_word, w_glst_word);
-+          w_f.clear();
-+          u8_u16(w_f, glst[k].word);
-+
-+          int leftcommon = leftcommonsubstring(w_word, w_f);
-+          if (low) {
-+            // lowering dictionary word
-+            mkallsmall_utf(w_f, langnum);
-+          }
-+
-+          sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
-         } else {
--          sc = ngram(n, word, glst[k].word,
--                     NGRAM_ANY_MISMATCH + low) +
--               leftcommonsubstring(word, glst[k].word);
-+          f = glst[k].word;
-+
-+          int leftcommon = leftcommonsubstring(word, f.c_str());
-+          if (low) {
-+            // lowering dictionary word
-+            mkallsmall(f, csconv);
-+          }
-+
-+          sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
-         }
- 
-         if (sc > thresh) {
-@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-       w_gl.clear();
-       if (utf8) {
-         u8_u16(w_gl, gl);
--        re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
--             ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
-+        //w_gl is lowercase already at this point
-+        re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+        if (low) {
-+          w_f = w_word;
-+          // lowering dictionary word
-+          mkallsmall_utf(w_f, langnum);
-+          re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+        } else {
-+          re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+        }
-       } else {
--        re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
--             ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
-+        //gl is lowercase already at this point
-+        re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+        if (low) {
-+          f = word;
-+          // lowering dictionary word
-+          mkallsmall(f, csconv);
-+          re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+        } else {
-+          re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
-+        }
-       }
- 
-       int ngram_score, leftcommon_score;
-       if (utf8) {
--        ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low);
-+        //w_gl is lowercase already at this point
-+        ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
-         leftcommon_score = leftcommonsubstring(w_word, w_gl);
-       } else {
--        ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low);
-+        //gl is lowercase already at this point
-+        ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
-         leftcommon_score = leftcommonsubstring(word, gl.c_str());
-       }
-       gscore[i] =
-@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n,
-   l2 = su2.size();
-   if (l2 == 0)
-     return 0;
--  // lowering dictionary word
--  const std::vector<w_char>* p_su2 = &su2;
--  std::vector<w_char> su2_copy;
--  if (opt & NGRAM_LOWERING) {
--    su2_copy = su2;
--    mkallsmall_utf(su2_copy, langnum);
--    p_su2 = &su2_copy;
--  }
-   for (int j = 1; j <= n; j++) {
-     ns = 0;
-     for (int i = 0; i <= (l1 - j); i++) {
-@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n,
-       for (int l = 0; l <= (l2 - j); l++) {
-         for (k = 0; k < j; k++) {
-           const w_char& c1 = su1[i + k];
--          const w_char& c2 = (*p_su2)[l + k];
-+          const w_char& c2 = su2[l + k];
-           if ((c1.l != c2.l) || (c1.h != c2.h))
-             break;
-         }
-@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n,
-   if (l2 == 0)
-     return 0;
-   l1 = s1.size();
--  std::string t(s2);
--  if (opt & NGRAM_LOWERING)
--    mkallsmall(t, csconv);
-   for (int j = 1; j <= n; j++) {
-     ns = 0;
-     for (int i = 0; i <= (l1 - j); i++) {
--      //t is haystack, s1[i..i+j) is needle
--      if (t.find(s1.c_str()+i, 0, j) != std::string::npos) {
-+      //s2 is haystack, s1[i..i+j) is needle
-+      if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
-         ns++;
-       } else if (opt & NGRAM_WEIGHTED) {
-         ns--;
--- 
-2.9.3
-
diff --git a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
deleted file mode 100644
index 6c8a108d6719..000000000000
--- a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From aab258adbd9c78931a36b96e58975a08000249a8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Fri, 10 Feb 2017 17:14:35 +0000
-Subject: [PATCH 4/4] either clear will be called anyway before use, or its
- unused afterwards
-
----
- src/hunspell/suggestmgr.cxx | 8 --------
- 1 file changed, 8 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index ea52707..ae34535 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -1089,7 +1089,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         continue;
- 
-       if (utf8) {
--        w_f.clear();
-         u8_u16(w_f, HENTRY_WORD(hp));
- 
-         int leftcommon = leftcommonsubstring(w_word, w_f);
-@@ -1115,7 +1114,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-           copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
-         int sc2;
-         if (utf8) {
--          w_f.clear();
-           u8_u16(w_f, f);
- 
-           int leftcommon = leftcommonsubstring(w_word, w_f);
-@@ -1139,7 +1137,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-       int scphon = -20000;
-       if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
-         if (utf8) {
--          w_candidate.clear();
-           u8_u16(w_candidate, HENTRY_WORD(hp));
-           mkallcap_utf(w_candidate, langnum);
-           u16_u8(candidate, w_candidate);
-@@ -1148,7 +1145,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-           mkallcap(candidate, csconv);
-         }
-         f = phonet(candidate, *ph);
--        w_f.clear();
-         if (utf8) {
-           u8_u16(w_f, f);
-           scphon = 2 * ngram(3, w_target, w_f,
-@@ -1254,7 +1250,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- 
-       for (int k = 0; k < nw; k++) {
-         if (utf8) {
--          w_f.clear();
-           u8_u16(w_f, glst[k].word);
- 
-           int leftcommon = leftcommonsubstring(w_word, w_f);
-@@ -1335,7 +1330,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-       std::string gl;
-       int len;
-       if (utf8) {
--        w_gl.clear();
-         len = u8_u16(w_gl, guess[i]);
-         mkallsmall_utf(w_gl, langnum);
-         u16_u8(gl, w_gl);
-@@ -1355,7 +1349,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-       }
-       // using 2-gram instead of 3, and other weightening
- 
--      w_gl.clear();
-       if (utf8) {
-         u8_u16(w_gl, gl);
-         //w_gl is lowercase already at this point
-@@ -1421,7 +1414,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-         // lowering rootphon[i]
-         std::string gl;
-         int len;
--        w_gl.clear();
-         if (utf8) {
-           len = u8_u16(w_gl, rootsphon[i]);
-           mkallsmall_utf(w_gl, langnum);
--- 
-2.9.3
-
diff --git a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch b/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
deleted file mode 100644
index 670d938e5441..000000000000
--- a/external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From f4ec6a283f972c82d068f4472320d424c40d45cb Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
- <laszlo.nemeth at collabora.com>
-Date: Thu, 23 Mar 2017 16:40:52 +0100
-Subject: [PATCH 5/7] fix syllable counting in compound word handling
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Note: one of the fixed regressions is related to an old
-hidden mistake: using clen instead of blen of the stem
-word lengths was indifferent with the original get_syllable(),
-because blen == clen at 8-bit encodings, and UTF-8
-words were handled by null-termination. Implementing Unicode
-support in Hunspell, clen was changed only in
-compound_check_morph() to blen accidentally, but not
-in compound_check(), resulting problems from the
-recent std::string conversion.
-
-Now this commit is a real fix for the regression from the
-commit c63c93237e4decdba5544a96093448605ac549c2,
-instead of the following bad fix:
-
-commit d06b0c57ae87ee8743f1bf53f80c1f8e364db619
-Author: László Németh <laszlo.nemeth at collabora.com>
-Date:   Fri Mar 17 15:11:23 2017 +0100
-
-    fix Hungarian compound word handling
----
- src/hunspell/affixmgr.cxx | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 2ed8233..3d65539 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1816,7 +1816,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
-           // LANG_hu section: spec. Hungarian rule
-           if (langnum == LANG_hu) {
-             // calculate syllable number of the word
--            numsyllable += get_syllable(st.substr(i));
-+            numsyllable += get_syllable(st.substr(0, i));
-             // + 1 word, if syllable number of the prefix > 1 (hungarian
-             // convention)
-             if (pfx && (get_syllable(pfx->getKey()) > 1))
-@@ -1901,7 +1901,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word,
-                  (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
-                 (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
-                  ((cpdmaxsyllable != 0) &&
--                  (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <=
-+                  (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
-                    cpdmaxsyllable))) &&
-                 (
-                     // test CHECKCOMPOUNDPATTERN
-@@ -2382,7 +2382,7 @@ int AffixMgr::compound_check_morph(const char* word,
-         // LANG_hu section: spec. Hungarian rule
-         if (langnum == LANG_hu) {
-           // calculate syllable number of the word
--          numsyllable += get_syllable(st.substr(i));
-+          numsyllable += get_syllable(st.substr(0, i));
- 
-           // + 1 word, if syllable number of the prefix > 1 (hungarian
-           // convention)
--- 
-2.7.4
-
diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk
index 40a4a101a8f0..3bb7e5e42dc7 100644
--- a/external/hunspell/UnpackedTarball_hunspell.mk
+++ b/external/hunspell/UnpackedTarball_hunspell.mk
@@ -21,15 +21,6 @@ $(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1))
 
 $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
 	external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \
-	external/hunspell/0001-unroll-this-a-bit.patch \
-	external/hunspell/0001-cppcheck-redundant-c_str.patch \
-	external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch \
-	external/hunspell/0001-loop-via-iterators.patch \
-	external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \
-	external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \
-	external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \
-	external/hunspell/0002-fix-other-regression-in-compounding.patch \
-	external/hunspell/0005-fix-syllable-counting-in-compound-word-handling.patch \
 ))
 
 # vim: set noet sw=4 ts=4:


More information about the Libreoffice-commits mailing list