[Libreoffice-commits] core.git: external/hunspell

Caolán McNamara caolanm at redhat.com
Sun Feb 12 21:18:24 UTC 2017


 external/hunspell/0001-cppcheck-redundant-c_str.patch                             |   34 
 external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch      |   57 
 external/hunspell/0001-loop-via-iterators.patch                                   |   36 
 external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch |   78 
 external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch              |  912 ----------
 external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch |   37 
 external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch        |  264 ++
 external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch |   81 
 external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch |  117 -
 external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch |  168 -
 external/hunspell/UnpackedTarball_hunspell.mk                                     |   10 
 11 files changed, 556 insertions(+), 1238 deletions(-)

New commits:
commit 163435fa23fbfc237a7718c9d440a98847e4f626
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Sun Feb 12 17:20:56 2017 +0000

    use alternative optimizations for buffer creation bottleneck
    
    Change-Id: I9f29e8d3e5e97fe403a3e0d7d03c6ac01c7689c4

diff --git a/external/hunspell/0001-cppcheck-redundant-c_str.patch b/external/hunspell/0001-cppcheck-redundant-c_str.patch
new file mode 100644
index 0000000..276ddd2
--- /dev/null
+++ b/external/hunspell/0001-cppcheck-redundant-c_str.patch
@@ -0,0 +1,34 @@
+From 9a0baf202f67291eaf482f1bcf654e21d71943e2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Mon, 23 Jan 2017 11:43:53 +0000
+Subject: [PATCH] cppcheck: redundant c_str
+
+---
+ src/hunspell/suggestmgr.cxx | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index b998341..8d46dd6 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1107,7 +1107,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         int sc2;
+         if (utf8) {
+           w_f.clear();
+-          u8_u16(w_f, f.c_str());
++          u8_u16(w_f, f);
+           sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
+                 leftcommonsubstring(w_word, w_f);
+         } else {
+@@ -1132,7 +1132,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         std::string target2 = phonet(candidate, *ph);
+         w_target2.clear();
+         if (utf8) {
+-          u8_u16(w_target2, target2.c_str());
++          u8_u16(w_target2, target2);
+           scphon = 2 * ngram(3, w_target, w_target2,
+                              NGRAM_LONGER_WORSE);
+         } else {
+-- 
+2.9.3
+
diff --git a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
new file mode 100644
index 0000000..bfcdf49
--- /dev/null
+++ b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
@@ -0,0 +1,57 @@
+From 93156ba9a8e644f8b0b724880668714adcb0d094 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Mon, 23 Jan 2017 12:05:07 +0000
+Subject: [PATCH] cppcheck: rv is reassigned before old value used
+
+---
+ src/hunspell/affixmgr.cxx   | 6 ++----
+ src/hunspell/suggestmgr.cxx | 3 +--
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
+index 680cbe9..21cf384 100644
+--- a/src/hunspell/affixmgr.cxx
++++ b/src/hunspell/affixmgr.cxx
+@@ -1494,9 +1494,8 @@ int AffixMgr::defcpd_check(hentry*** words,
+ }
+ 
+ inline int AffixMgr::candidate_check(const char* word, int len) {
+-  struct hentry* rv = NULL;
+ 
+-  rv = lookup(word);
++  struct hentry* rv = lookup(word);
+   if (rv)
+     return 1;
+ 
+@@ -3045,10 +3044,9 @@ struct hentry* AffixMgr::affix_check(const char* word,
+                                      int len,
+                                      const FLAG needflag,
+                                      char in_compound) {
+-  struct hentry* rv = NULL;
+ 
+   // check all prefixes (also crossed with suffixes if allowed)
+-  rv = prefix_check(word, len, in_compound, needflag);
++  struct hentry* rv = prefix_check(word, len, in_compound, needflag);
+   if (rv)
+     return rv;
+ 
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index 8d46dd6..54a474f 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1675,11 +1675,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
+   if (HENTRY_DATA(rv))
+     p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
+   while (p) {
+-    struct hentry* rv2 = NULL;
+     p += MORPH_TAG_LEN;
+     int plen = fieldlen(p);
+     std::string allomorph(p, plen);
+-    rv2 = pAMgr->lookup(allomorph.c_str());
++    struct hentry* rv2 = pAMgr->lookup(allomorph.c_str());
+     while (rv2) {
+       //            if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
+       //            sfxcount) {
+-- 
+2.9.3
+
diff --git a/external/hunspell/0001-loop-via-iterators.patch b/external/hunspell/0001-loop-via-iterators.patch
new file mode 100644
index 0000000..6ecdd76
--- /dev/null
+++ b/external/hunspell/0001-loop-via-iterators.patch
@@ -0,0 +1,36 @@
+From f366e97fa8d7ad21060033b733dda15299edf7c5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 15:37:11 +0000
+Subject: [PATCH 1/4] loop via iterators
+
+---
+ src/hunspell/csutil.cxx | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
+index c1666a5..2408677 100644
+--- a/src/hunspell/csutil.cxx
++++ b/src/hunspell/csutil.cxx
+@@ -2537,13 +2537,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
+   size_t ncap = 0;
+   size_t nneutral = 0;
+   size_t firstcap = 0;
+-  for (size_t i = 0; i < word.size(); ++i) {
+-    unsigned short idx = (word[i].h << 8) + word[i].l;
++
++  std::vector<w_char>::const_iterator it = word.begin();
++  std::vector<w_char>::const_iterator it_end = word.end();
++  while (it != it_end) {
++    unsigned short idx = (it->h << 8) + it->l;
+     unsigned short lwridx = unicodetolower(idx, langnum);
+     if (idx != lwridx)
+       ncap++;
+     if (unicodetoupper(idx, langnum) == lwridx)
+       nneutral++;
++    ++it;
+   }
+   if (ncap) {
+     unsigned short idx = (word[0].h << 8) + word[0].l;
+-- 
+2.9.3
+
diff --git a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
new file mode 100644
index 0000000..88695ec
--- /dev/null
+++ b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
@@ -0,0 +1,78 @@
+From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 15:49:17 +0000
+Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer
+
+kcachegrind reports 1,057,506,901 -> 830,529,143 on
+
+echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
+---
+ src/hunspell/hashmgr.cxx | 16 +++++++++++-----
+ src/hunspell/hashmgr.hxx |  1 +
+ 2 files changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
+index 1de1690..4844b49 100644
+--- a/src/hunspell/hashmgr.cxx
++++ b/src/hunspell/hashmgr.cxx
+@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
+ }
+ 
+ // detect captype and modify word length for UTF-8 encoding
+-int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
++int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
+   int len;
+   if (utf8) {
+-    std::vector<w_char> dest_utf;
+-    len = u8_u16(dest_utf, word);
+-    *captype = get_captype_utf8(dest_utf, langnum);
++    len = u8_u16(workbuf, word);
++    *captype = get_captype_utf8(workbuf, langnum);
+   } else {
+     len = word.size();
+     *captype = get_captype(word, csconv);
+@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
+   return len;
+ }
+ 
++int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
++  std::vector<w_char> workbuf;
++  return get_clen_and_captype(word, captype, workbuf);
++}
++
+ // remove word (personal dictionary function for standalone applications)
+ int HashMgr::remove(const std::string& word) {
+   struct hentry* dp = lookup(word.c_str());
+@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
+   // loop through all words on much list and add to hash
+   // table and create word and affix strings
+ 
++  std::vector<w_char> workbuf;
++
+   while (dict->getline(ts)) {
+     mychomp(ts);
+     // split each line into word and morphological description
+@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
+     }
+ 
+     int captype;
+-    int wcl = get_clen_and_captype(ts, &captype);
++    int wcl = get_clen_and_captype(ts, &captype, workbuf);
+     const std::string *dp_str = dp.empty() ? NULL : &dp;
+     // add the word and its index plus its capitalized form optionally
+     if (add_word(ts, wcl, flags, al, dp_str, false) ||
+diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
+index 812171a..5a09c45 100644
+--- a/src/hunspell/hashmgr.hxx
++++ b/src/hunspell/hashmgr.hxx
+@@ -125,6 +125,7 @@ class HashMgr {
+ 
+  private:
+   int get_clen_and_captype(const std::string& word, int* captype);
++  int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
+   int load_tables(const char* tpath, const char* key);
+   int add_word(const std::string& word,
+                int wcl,
+-- 
+2.9.3
+
diff --git a/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch b/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch
deleted file mode 100644
index 31b8c04..0000000
--- a/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch
+++ /dev/null
@@ -1,912 +0,0 @@
-From 3a935abd0539143ee952d2f86ec513be6a056d5e Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 13:35:13 +0000
-Subject: [PATCH 2/3] rename std::vector<w_char> to wide::string
-
----
- src/hunspell/affixmgr.cxx   |  8 ++---
- src/hunspell/affixmgr.hxx   | 10 +++----
- src/hunspell/csutil.cxx     | 28 +++++++++---------
- src/hunspell/csutil.hxx     | 26 ++++++++--------
- src/hunspell/hashmgr.cxx    | 12 ++++----
- src/hunspell/hashmgr.hxx    |  2 +-
- src/hunspell/hunspell.cxx   | 46 ++++++++++++++---------------
- src/hunspell/hunspell.hxx   |  2 +-
- src/hunspell/suggestmgr.cxx | 72 ++++++++++++++++++++++-----------------------
- src/hunspell/suggestmgr.hxx | 12 ++++----
- src/hunspell/w_char.hxx     |  6 ++++
- src/parsers/textparser.cxx  |  2 +-
- src/tools/hunspell.cxx      | 10 +++----
- 13 files changed, 121 insertions(+), 115 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 21cf384..4f64721 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1338,7 +1338,7 @@ int AffixMgr::cpdcase_check(const char* word, int pos) {
-     for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)
-       ;
-     std::string pair(p);
--    std::vector<w_char> pair_u;
-+    wide::string pair_u;
-     u8_u16(pair_u, pair);
-     unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0;
-     unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;
-@@ -1523,7 +1523,7 @@ short AffixMgr::get_syllable(const std::string& word) {
-       }
-     }
-   } else if (!cpdvowels_utf16.empty()) {
--    std::vector<w_char> w;
-+    wide::string w;
-     u8_u16(w, word);
-     for (size_t i = 0; i < w.size(); ++i) {
-       if (std::binary_search(cpdvowels_utf16.begin(),
-@@ -3505,7 +3505,7 @@ const char* AffixMgr::get_ignore() const {
- }
- 
- // return the preferred ignore string for suggestions
--const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {
-+const wide::string& AffixMgr::get_ignore_utf16() const {
-   return ignorechars_utf16;
- }
- 
-@@ -3528,7 +3528,7 @@ const std::string& AffixMgr::get_wordchars() const {
-   return wordchars;
- }
- 
--const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {
-+const wide::string& AffixMgr::get_wordchars_utf16() const {
-   return wordchars_utf16;
- }
- 
-diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
-index 83a4b42..11f1a67 100644
---- a/src/hunspell/affixmgr.hxx
-+++ b/src/hunspell/affixmgr.hxx
-@@ -146,7 +146,7 @@ class AffixMgr {
-   int cpdwordmax;
-   int cpdmaxsyllable;
-   std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
--  std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
-+  wide::string cpdvowels_utf16; //vowels for UTF-8 encoding
-   std::string cpdsyllablenum; // syllable count incrementing flag
-   const char* pfxappnd;  // BUG: not stateless
-   const char* sfxappnd;  // BUG: not stateless
-@@ -157,9 +157,9 @@ class AffixMgr {
-   PfxEntry* pfx;         // BUG: not stateless
-   int checknum;
-   std::string wordchars; // letters + spec. word characters
--  std::vector<w_char> wordchars_utf16;
-+  wide::string wordchars_utf16;
-   std::string ignorechars; // letters + spec. word characters
--  std::vector<w_char> ignorechars_utf16;
-+  wide::string ignorechars_utf16;
-   std::string version;   // affix and dictionary file version string
-   std::string lang;	 // language
-   int langnum;
-@@ -306,9 +306,9 @@ class AffixMgr {
-   char* get_key_string();
-   char* get_try_string() const;
-   const std::string& get_wordchars() const;
--  const std::vector<w_char>& get_wordchars_utf16() const;
-+  const wide::string& get_wordchars_utf16() const;
-   const char* get_ignore() const;
--  const std::vector<w_char>& get_ignore_utf16() const;
-+  const wide::string& get_ignore_utf16() const;
-   int get_compound() const;
-   FLAG get_compoundflag() const;
-   FLAG get_forbiddenword() const;
-diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
-index c1666a5..2f59b3d 100644
---- a/src/hunspell/csutil.cxx
-+++ b/src/hunspell/csutil.cxx
-@@ -143,10 +143,10 @@ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mod
-   stream.open(path, mode);
- }
- 
--std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
-+std::string& u16_u8(std::string& dest, const wide::string& src) {
-   dest.clear();
--  std::vector<w_char>::const_iterator u2 = src.begin();
--  std::vector<w_char>::const_iterator u2_max = src.end();
-+  wide::string::const_iterator u2 = src.begin();
-+  wide::string::const_iterator u2_max = src.end();
-   while (u2 < u2_max) {
-     signed char u8;
-     if (u2->h) {  // > 0xFF
-@@ -180,7 +180,7 @@ std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
-   return dest;
- }
- 
--int u8_u16(std::vector<w_char>& dest, const std::string& src) {
-+int u8_u16(wide::string& dest, const std::string& src) {
-   dest.clear();
-   std::string::const_iterator u8 = src.begin();
-   std::string::const_iterator u8_max = src.end();
-@@ -474,7 +474,7 @@ size_t reverseword(std::string& word) {
- 
- // reverse word
- size_t reverseword_utf(std::string& word) {
--  std::vector<w_char> w;
-+  wide::string w;
-   u8_u16(w, word);
-   std::reverse(w.begin(), w.end());
-   u16_u8(word, w);
-@@ -552,7 +552,7 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
-   return s;
- }
- 
--std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
-+wide::string& mkallsmall_utf(wide::string& u,
-                                           int langnum) {
-   for (size_t i = 0; i < u.size(); ++i) {
-     unsigned short idx = (u[i].h << 8) + u[i].l;
-@@ -565,7 +565,7 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
-   return u;
- }
- 
--std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
-+wide::string& mkallcap_utf(wide::string& u, int langnum) {
-   for (size_t i = 0; i < u.size(); i++) {
-     unsigned short idx = (u[i].h << 8) + u[i].l;
-     unsigned short upridx = unicodetoupper(idx, langnum);
-@@ -584,7 +584,7 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
-   return s;
- }
- 
--std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
-+wide::string& mkinitcap_utf(wide::string& u, int langnum) {
-   if (!u.empty()) {
-     unsigned short idx = (u[0].h << 8) + u[0].l;
-     unsigned short upridx = unicodetoupper(idx, langnum);
-@@ -603,7 +603,7 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
-   return s;
- }
- 
--std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
-+wide::string& mkinitsmall_utf(wide::string& u, int langnum) {
-   if (!u.empty()) {
-     unsigned short idx = (u[0].h << 8) + u[0].l;
-     unsigned short lwridx = unicodetolower(idx, langnum);
-@@ -2532,7 +2532,7 @@ int get_captype(const std::string& word, cs_info* csconv) {
-   return HUHCAP;
- }
- 
--int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
-+int get_captype_utf8(const wide::string& word, int langnum) {
-   // now determine the capitalization type of the first nl letters
-   size_t ncap = 0;
-   size_t nneutral = 0;
-@@ -2565,9 +2565,9 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
- 
- // strip all ignored characters in the string
- size_t remove_ignored_chars_utf(std::string& word,
--                                const std::vector<w_char>& ignored_chars) {
--  std::vector<w_char> w;
--  std::vector<w_char> w2;
-+                                const wide::string& ignored_chars) {
-+  wide::string w;
-+  wide::string w2;
-   u8_u16(w, word);
- 
-   for (size_t i = 0; i < w.size(); ++i) {
-@@ -2626,7 +2626,7 @@ bool parse_string(const std::string& line, std::string& out, int ln) {
- 
- bool parse_array(const std::string& line,
-                  std::string& out,
--                 std::vector<w_char>& out_utf16,
-+                 wide::string& out_utf16,
-                  int utf8,
-                  int ln) {
-   if (!parse_string(line, out, ln))
-diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx
-index 302d7e9..313672e 100644
---- a/src/hunspell/csutil.hxx
-+++ b/src/hunspell/csutil.hxx
-@@ -134,10 +134,10 @@ LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
- 
- // convert UTF-16 characters to UTF-8
- LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
--                                             const std::vector<w_char>& src);
-+                                             const wide::string& src);
- 
- // convert UTF-8 characters to UTF-16
--LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
-+LIBHUNSPELL_DLL_EXPORTED int u8_u16(wide::string& dest,
-                                     const std::string& src);
- 
- // remove end of line char(s)
-@@ -219,31 +219,31 @@ LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
-                                                 const struct cs_info* csconv);
- 
- // convert first letter of UTF-8 string to capital
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkinitcap_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkinitcap_utf(wide::string& u, int langnum);
- 
- // convert UTF-8 string to little
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkallsmall_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkallsmall_utf(wide::string& u, int langnum);
- 
- // convert first letter of UTF-8 string to little
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkinitsmall_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkinitsmall_utf(wide::string& u, int langnum);
- 
- // convert UTF-8 string to capital
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkallcap_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkallcap_utf(wide::string& u, int langnum);
- 
- // get type of capitalization
- LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
- 
- // get type of capitalization (UTF-8)
--LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const wide::string& q, int langnum);
- 
- // strip all ignored characters in the string
- LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
-     std::string& word,
--    const std::vector<w_char>& ignored_chars);
-+    const wide::string& ignored_chars);
- 
- // strip all ignored characters in the string
- LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
-@@ -256,7 +256,7 @@ LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
- 
- LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
-                                           std::string& out,
--                                          std::vector<w_char>& out_utf16,
-+                                          wide::string& out_utf16,
-                                           int utf8,
-                                           int ln);
- 
-diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
-index 1de1690..6d92e9b 100644
---- a/src/hunspell/hashmgr.cxx
-+++ b/src/hunspell/hashmgr.cxx
-@@ -345,7 +345,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
-     flags2[flagslen] = ONLYUPCASEFLAG;
-     if (utf8) {
-       std::string st;
--      std::vector<w_char> w;
-+      wide::string w;
-       u8_u16(w, word);
-       mkallsmall_utf(w, langnum);
-       mkinitcap_utf(w, langnum);
-@@ -366,7 +366,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-   int len;
-   if (utf8) {
--    std::vector<w_char> dest_utf;
-+    wide::string dest_utf;
-     len = u8_u16(dest_utf, word);
-     *captype = get_captype_utf8(dest_utf, langnum);
-   } else {
-@@ -688,7 +688,7 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
-       break;
-     }
-     case FLAG_UNI: {  // UTF-8 characters
--      std::vector<w_char> w;
-+      wide::string w;
-       u8_u16(w, flags);
-       len = w.size();
-       *result = (unsigned short*)malloc(len * sizeof(unsigned short));
-@@ -760,7 +760,7 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
-       break;
-     }
-     case FLAG_UNI: {  // UTF-8 characters
--      std::vector<w_char> w;
-+      wide::string w;
-       u8_u16(w, flags);
-       size_t len = w.size();
-       size_t origsize = result.size();
-@@ -793,7 +793,7 @@ unsigned short HashMgr::decode_flag(const char* f) const {
-       s = (unsigned short)i;
-       break;
-     case FLAG_UNI: {
--      std::vector<w_char> w;
-+      wide::string w;
-       u8_u16(w, f);
-       if (!w.empty())
-           memcpy(&s, &w[0], 1 * sizeof(short));
-@@ -820,7 +820,7 @@ char* HashMgr::encode_flag(unsigned short f) const {
-     ch = stream.str();
-   } else if (flag_mode == FLAG_UNI) {
-     const w_char* w_c = (const w_char*)&f;
--    std::vector<w_char> w(w_c, w_c + 1);
-+    wide::string w(w_c, w_c + 1);
-     u16_u8(ch, w);
-   } else {
-     ch.push_back((unsigned char)(f));
-diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
-index 812171a..312c8ba 100644
---- a/src/hunspell/hashmgr.hxx
-+++ b/src/hunspell/hashmgr.hxx
-@@ -96,7 +96,7 @@ class HashMgr {
-   std::string lang;
-   struct cs_info* csconv;
-   std::string ignorechars;
--  std::vector<w_char> ignorechars_utf16;
-+  wide::string ignorechars_utf16;
-   int numaliasf;  // flag vector `compression' with aliases
-   unsigned short** aliasf;
-   unsigned short* aliasflen;
-diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx
-index a8d78dc..46f1df9 100644
---- a/src/hunspell/hunspell.cxx
-+++ b/src/hunspell/hunspell.cxx
-@@ -103,7 +103,7 @@ public:
-   bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
-   std::vector<std::string> suggest(const std::string& word);
-   const std::string& get_wordchars() const;
--  const std::vector<w_char>& get_wordchars_utf16() const;
-+  const wide::string& get_wordchars_utf16() const;
-   const std::string& get_dict_encoding() const;
-   int add(const std::string& word);
-   int add_with_affix(const std::string& word, const std::string& example);
-@@ -127,15 +127,15 @@ private:
- private:
-   void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
-   size_t cleanword2(std::string& dest,
--                    std::vector<w_char>& dest_u,
-+                    wide::string& dest_u,
-                     const std::string& src,
-                     int* pcaptype,
-                     size_t* pabbrev);
-   void mkinitcap(std::string& u8);
--  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
--  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
-+  int mkinitcap2(std::string& u8, wide::string& u16);
-+  int mkinitsmall2(std::string& u8, wide::string& u16);
-   void mkallcap(std::string& u8);
--  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
-+  int mkallsmall2(std::string& u8, wide::string& u16);
-   struct hentry* checkword(const std::string& source, int* info, std::string* root);
-   std::string sharps_u8_l1(const std::string& source);
-   hentry*
-@@ -231,7 +231,7 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) {
- // return the length of the "cleaned" (and UTF-8 encoded) word
- 
- size_t HunspellImpl::cleanword2(std::string& dest,
--                         std::vector<w_char>& dest_utf,
-+                         wide::string& dest_utf,
-                          const std::string& src,
-                          int* pcaptype,
-                          size_t* pabbrev) {
-@@ -313,7 +313,7 @@ void HunspellImpl::cleanword(std::string& dest,
-     // remember to terminate the destination string
-     firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
-   } else {
--    std::vector<w_char> t;
-+    wide::string t;
-     u8_u16(t, src);
-     for (size_t i = 0; i < t.size(); ++i) {
-       unsigned short idx = (t[i].h << 8) + t[i].l;
-@@ -346,7 +346,7 @@ void HunspellImpl::cleanword(std::string& dest,
- 
- void HunspellImpl::mkallcap(std::string& u8) {
-   if (utf8) {
--    std::vector<w_char> u16;
-+    wide::string u16;
-     u8_u16(u16, u8);
-     ::mkallcap_utf(u16, langnum);
-     u16_u8(u8, u16);
-@@ -355,7 +355,7 @@ void HunspellImpl::mkallcap(std::string& u8) {
-   }
- }
- 
--int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
-+int HunspellImpl::mkallsmall2(std::string& u8, wide::string& u16) {
-   if (utf8) {
-     ::mkallsmall_utf(u16, langnum);
-     u16_u8(u8, u16);
-@@ -438,7 +438,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
-   size_t wl = 0;
- 
-   std::string scw;
--  std::vector<w_char> sunicw;
-+  wide::string sunicw;
- 
-   // input conversion
-   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
-@@ -519,7 +519,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
-           std::string part1 = scw.substr(0, apos+1);
-           std::string part2 = scw.substr(apos+1);
-           if (utf8) {
--            std::vector<w_char> part1u, part2u;
-+            wide::string part1u, part2u;
-             u8_u16(part1u, part1);
-             u8_u16(part2u, part2);
-             mkinitcap2(part2, part2u);
-@@ -704,7 +704,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
-   if (ignoredchars != NULL) {
-     w2.assign(w);
-     if (utf8) {
--      const std::vector<w_char>& ignoredchars_utf16 =
-+      const wide::string& ignoredchars_utf16 =
-           pAMgr->get_ignore_utf16();
-       remove_ignored_chars_utf(w2, ignoredchars_utf16);
-     } else {
-@@ -855,7 +855,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
-   size_t wl = 0;
- 
-   std::string scw;
--  std::vector<w_char> sunicw;
-+  wide::string sunicw;
- 
-   // input conversion
-   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-@@ -909,7 +909,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
-         std::string postdot = scw.substr(dot_pos + 1);
-         int captype_;
-         if (utf8) {
--          std::vector<w_char> postdotu;
-+          wide::string postdotu;
-           u8_u16(postdotu, postdot);
-           captype_ = get_captype_utf8(postdotu, langnum);
-         } else {
-@@ -951,7 +951,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
-           if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
-             std::string first(slst[j].c_str(), space + 1);
-             std::string second(space + 1);
--            std::vector<w_char> w;
-+            wide::string w;
-             if (utf8)
-               u8_u16(w, second);
-             mkinitcap2(second, w);
-@@ -1109,7 +1109,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
-         for (size_t j = 0; j < slst.size(); ++j) {
-           if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
-             std::string s;
--            std::vector<w_char> w;
-+            wide::string w;
-             if (utf8) {
-               u8_u16(w, slst[j]);
-             } else {
-@@ -1262,17 +1262,17 @@ const std::string& HunspellImpl::get_wordchars() const {
-   return pAMgr->get_wordchars();
- }
- 
--const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
-+const wide::string& Hunspell::get_wordchars_utf16() const {
-   return m_Impl->get_wordchars_utf16();
- }
- 
--const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
-+const wide::string& HunspellImpl::get_wordchars_utf16() const {
-   return pAMgr->get_wordchars_utf16();
- }
- 
- void HunspellImpl::mkinitcap(std::string& u8) {
-   if (utf8) {
--    std::vector<w_char> u16;
-+    wide::string u16;
-     u8_u16(u16, u8);
-     ::mkinitcap_utf(u16, langnum);
-     u16_u8(u8, u16);
-@@ -1281,7 +1281,7 @@ void HunspellImpl::mkinitcap(std::string& u8) {
-   }
- }
- 
--int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
-+int HunspellImpl::mkinitcap2(std::string& u8, wide::string& u16) {
-   if (utf8) {
-     ::mkinitcap_utf(u16, langnum);
-     u16_u8(u8, u16);
-@@ -1291,7 +1291,7 @@ int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
-   return u8.size();
- }
- 
--int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
-+int HunspellImpl::mkinitsmall2(std::string& u8, wide::string& u16) {
-   if (utf8) {
-     ::mkinitsmall_utf(u16, langnum);
-     u16_u8(u8, u16);
-@@ -1379,7 +1379,7 @@ std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
-   size_t wl = 0;
- 
-   std::string scw;
--  std::vector<w_char> sunicw;
-+  wide::string sunicw;
- 
-   // input conversion
-   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-@@ -1994,7 +1994,7 @@ std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_wo
-   if (ignoredchars != NULL) {
-     w2.assign(root_word);
-     if (utf8) {
--      const std::vector<w_char>& ignoredchars_utf16 =
-+      const wide::string& ignoredchars_utf16 =
-           pAMgr->get_ignore_utf16();
-       remove_ignored_chars_utf(w2, ignoredchars_utf16);
-     } else {
-diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx
-index 43af66b..375a7da 100644
---- a/src/hunspell/hunspell.hxx
-+++ b/src/hunspell/hunspell.hxx
-@@ -215,7 +215,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
-   /* get extra word characters definied in affix file for tokenization */
-   const char* get_wordchars() const;
-   const std::string& get_wordchars_cpp() const;
--  const std::vector<w_char>& get_wordchars_utf16() const;
-+  const wide::string& get_wordchars_utf16() const;
- 
-   struct cs_info* get_csconv();
-   
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 54a474f..1deec96 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -179,7 +179,7 @@ void SuggestMgr::suggest(std::vector<std::string>& slst,
-                         const char* w,
-                         int* onlycompoundsug) {
-   int nocompoundtwowords = 0;
--  std::vector<w_char> word_utf;
-+  wide::string word_utf;
-   int wl = 0;
-   size_t nsugorig = slst.size();
-   std::string w2;
-@@ -313,7 +313,7 @@ void SuggestMgr::capchars_utf(std::vector<std::string>& wlst,
-                               const w_char* word,
-                               int wl,
-                               int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   mkallcap_utf(candidate_utf, langnum);
-   std::string candidate;
-   u16_u8(candidate, candidate_utf);
-@@ -491,7 +491,7 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
-     if (word[i] == word[i - 2]) {
-       state++;
-       if (state == 3) {
--        std::vector<w_char> candidate_utf(word, word + i - 1);
-+        wide::string candidate_utf(word, word + i - 1);
-         candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
-         std::string candidate;
-         u16_u8(candidate, candidate_utf);
-@@ -549,7 +549,7 @@ int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst,
-                                int wl,
-                                int cpdsuggest) {
-   std::string candidate;
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   // swap out each char one by one and try all the tryme
-   // chars in its place to see if that makes a good word
-   for (int i = 0; i < wl; i++) {
-@@ -614,7 +614,7 @@ int SuggestMgr::badchar_utf(std::vector<std::string>& wlst,
-                             const w_char* word,
-                             int wl,
-                             int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   std::string candidate;
-   clock_t timelimit = clock();
-   int timer = MINTIMER;
-@@ -641,7 +641,7 @@ int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst,
-                               const w_char* word,
-                               int wl,
-                               int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   if (candidate_utf.size() < 2)
-     return wlst.size();
-   // try omitting one char of word at a time
-@@ -703,7 +703,7 @@ int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst,
-                                const w_char* word,
-                                int wl,
-                                int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   clock_t timelimit = clock();
-   int timer = MINTIMER;
- 
-@@ -852,7 +852,7 @@ int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst,
-                              const w_char* word,
-                              int wl,
-                              int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   if (candidate_utf.size() < 2)
-     return wlst.size();
- 
-@@ -909,10 +909,10 @@ int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst,
-                                  const w_char* word,
-                                  int wl,
-                                  int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   // try swapping not adjacent chars
--  for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
--    for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
-+  for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
-+    for (wide::string::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
-       if (std::abs(std::distance(q, p)) > 1) {
-         std::swap(*p, *q);
-         std::string candidate;
-@@ -962,13 +962,13 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
-                              const w_char* word,
-                              int wl,
-                              int cpdsuggest) {
--  std::vector<w_char> candidate_utf(word, word + wl);
-+  wide::string candidate_utf(word, word + wl);
-   if (candidate_utf.size() < 2)
-     return wlst.size();
- 
-   // try moving a char
--  for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
--    for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
-+  for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
-+    for (wide::string::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
-       std::swap(*q, *(q - 1));
-       if (std::distance(p, q) < 2)
-         continue;  // omit swap char
-@@ -979,8 +979,8 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
-     std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
-   }
- 
--  for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
--    for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
-+  for (wide::string::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
-+    for (wide::string::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
-       std::swap(*q, *(q - 1));
-       if (std::distance(p, q) < 2)
-         continue;  // omit swap char
-@@ -1032,7 +1032,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-     word = w2.c_str();
-   }
- 
--  std::vector<w_char> u8;
-+  wide::string u8;
-   int nc = strlen(word);
-   int n = (utf8) ? u8_u16(u8, word) : nc;
- 
-@@ -1050,7 +1050,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-   phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
-   std::string target;
-   std::string candidate;
--  std::vector<w_char> w_candidate;
-+  wide::string w_candidate;
-   if (ph) {
-     if (utf8) {
-       u8_u16(w_candidate, word);
-@@ -1069,16 +1069,16 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-   FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
-   FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
- 
--  std::vector<w_char> w_word, w_target;
-+  wide::string w_word, w_target;
-   if (utf8) {
-     u8_u16(w_word, word);
-     u8_u16(w_target, target);
-   }
-   
--  std::vector<w_char> w_entry;
-+  wide::string w_entry;
-   std::string f;
--  std::vector<w_char> w_f;
--  std::vector<w_char> w_target2;
-+  wide::string w_f;
-+  wide::string w_target2;
-   
-   for (size_t i = 0; i < rHMgr.size(); ++i) {
-     while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
-@@ -1168,7 +1168,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-   // find minimum threshold for a passable suggestion
-   // mangle original word three differnt ways
-   // and score them to generate a minimum acceptable score
--  std::vector<w_char> w_mw;
-+  wide::string w_mw;
-   int thresh = 0;
-   for (int sp = 1; sp < 4; sp++) {
-     if (utf8) {
-@@ -1210,7 +1210,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-     return;
-   }
- 
--  std::vector<w_char> w_glst_word;
-+  wide::string w_glst_word;
-   for (int i = 0; i < MAX_ROOTS; i++) {
-     if (roots[i]) {
-       struct hentry* rp = roots[i];
-@@ -1288,7 +1288,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-       fact = (10.0 - maxd) / 5.0;
-   }
- 
--  std::vector<w_char> w_gl;
-+  wide::string w_gl;
-   for (int i = 0; i < MAX_GUESS; i++) {
-     if (guess[i]) {
-       // lowering guess[i]
-@@ -1789,8 +1789,8 @@ std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const
- 
- // generate an n-gram score comparing s1 and s2, UTF16 version
- int SuggestMgr::ngram(int n,
--                      const std::vector<w_char>& su1,
--                      const std::vector<w_char>& su2,
-+                      const wide::string& su1,
-+                      const wide::string& su2,
-                       int opt) {
-   int nscore = 0;
-   int ns;
-@@ -1803,8 +1803,8 @@ int SuggestMgr::ngram(int n,
-   if (l2 == 0)
-     return 0;
-   // lowering dictionary word
--  const std::vector<w_char>* p_su2 = &su2;
--  std::vector<w_char> su2_copy;
-+  const wide::string* p_su2 = &su2;
-+  wide::string su2_copy;
-   if (opt & NGRAM_LOWERING) {
-     su2_copy = su2;
-     mkallsmall_utf(su2_copy, langnum);
-@@ -1894,8 +1894,8 @@ int SuggestMgr::ngram(int n,
- 
- // length of the left common substring of s1 and (decapitalised) s2, UTF version
- int SuggestMgr::leftcommonsubstring(
--    const std::vector<w_char>& su1,
--    const std::vector<w_char>& su2) {
-+    const wide::string& su1,
-+    const wide::string& su2) {
-   int l1 = su1.size();
-   int l2 = su2.size();
-   // decapitalize dictionary word
-@@ -1948,8 +1948,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
-   int diffpos[2];
-   *is_swap = 0;
-   if (utf8) {
--    std::vector<w_char> su1;
--    std::vector<w_char> su2;
-+    wide::string su1;
-+    wide::string su2;
-     int l1 = u8_u16(su1, s1);
-     int l2 = u8_u16(su2, s2);
- 
-@@ -2004,7 +2004,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
- 
- int SuggestMgr::mystrlen(const char* word) {
-   if (utf8) {
--    std::vector<w_char> w;
-+    wide::string w;
-     return u8_u16(w, word);
-   } else
-     return strlen(word);
-@@ -2044,8 +2044,8 @@ void SuggestMgr::lcs(const char* s,
-                      int* l2,
-                      char** result) {
-   int n, m;
--  std::vector<w_char> su;
--  std::vector<w_char> su2;
-+  wide::string su;
-+  wide::string su2;
-   char* b;
-   char* c;
-   int i;
-diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
-index 6ba9dc8..9bfa933 100644
---- a/src/hunspell/suggestmgr.hxx
-+++ b/src/hunspell/suggestmgr.hxx
-@@ -107,11 +107,11 @@ class SuggestMgr {
-  private:
-   char* ckey;
-   size_t ckeyl;
--  std::vector<w_char> ckey_utf;
-+  wide::string ckey_utf;
- 
-   char* ctry;
-   size_t ctryl;
--  std::vector<w_char> ctry_utf;
-+  wide::string ctry_utf;
- 
-   AffixMgr* pAMgr;
-   unsigned int maxSug;
-@@ -173,12 +173,12 @@ class SuggestMgr {
-                   const std::vector<mapentry>&,
-                   int*,
-                   clock_t*);
--  int ngram(int n, const std::vector<w_char>& su1,
--            const std::vector<w_char>& su2, int opt);
-+  int ngram(int n, const wide::string& su1,
-+            const wide::string& su2, int opt);
-   int ngram(int n, const std::string& s1, const std::string& s2, int opt);
-   int mystrlen(const char* word);
--  int leftcommonsubstring(const std::vector<w_char>& su1,
--                          const std::vector<w_char>& su2);
-+  int leftcommonsubstring(const wide::string& su1,
-+                          const wide::string& su2);
-   int leftcommonsubstring(const char* s1, const char* s2);
-   int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
-   void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
-diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
-index c561ffc..84ae13c 100644
---- a/src/hunspell/w_char.hxx
-+++ b/src/hunspell/w_char.hxx
-@@ -42,6 +42,7 @@
- #define W_CHAR_HXX_
- 
- #include <string>
-+#include <vector>
- 
- #ifndef GCC
- struct w_char {
-@@ -72,4 +73,9 @@ struct replentry {
-   std::string outstrings[4]; // med, ini, fin, isol
- };
- 
-+namespace wide
-+{
-+    typedef std::vector<w_char> string;
-+}
-+
- #endif
-diff --git a/src/parsers/textparser.cxx b/src/parsers/textparser.cxx
-index 53548e4..8e43f79 100644
---- a/src/parsers/textparser.cxx
-+++ b/src/parsers/textparser.cxx
-@@ -81,7 +81,7 @@ int TextParser::is_wordchar(const char* w) {
-   if (*w == '\0')
-     return 0;
-   if (utf8) {
--    std::vector<w_char> wc;
-+    wide::string wc;
-     unsigned short idx;
-     u8_u16(wc, w);
-     if (wc.empty())
-diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx
-index 3172409..c39f148 100644
---- a/src/tools/hunspell.cxx
-+++ b/src/tools/hunspell.cxx
-@@ -199,7 +199,7 @@ enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST, FMT_XML, FMT_ODF };
- std::string wordchars;
- char* dicpath = NULL;
- const w_char* wordchars_utf16 = NULL;
--std::vector<w_char> new_wordchars_utf16;
-+wide::string new_wordchars_utf16;
- int wordchars_utf16_len;
- char* dicname = NULL;
- char* privdicname = NULL;
-@@ -311,7 +311,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
-   }
- 
-   if (io_utf8) {
--    const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
-+    const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
-     const std::string& vec_wordchars = pMS->get_wordchars_cpp();
-     wordchars_utf16_len = vec_wordchars_utf16.size();
-     wordchars_utf16 = wordchars_utf16_len ? &vec_wordchars_utf16[0] : NULL;
-@@ -356,7 +356,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
-         ch[1] = '\0';
-         size_t res = iconv(conv, (ICONV_CONST char**)&ch8bit, &c1, &dest, &c2);
-         if (res != (size_t)-1) {
--          std::vector<w_char> w;
-+          wide::string w;
-           u8_u16(w, std::string(u8, dest));
-           unsigned short idx = w.empty() ? 0 : (w[0].h << 8) + w[0].l;
-           if (unicodeisalpha(idx)) {
-@@ -395,7 +395,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
-   }
- #else
-   if (strcmp(denc, "UTF-8") == 0) {
--    const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
-+    const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
-     wordchars_utf16 = &vec_wordchars_utf16[0];
-     wordchars_utf16_len = vec_wordchars_utf16.size();
-     io_utf8 = 1;
-@@ -1199,7 +1199,7 @@ void dialogscreen(TextParser* parser,
- std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
-   std::string utf8str(token);
-   chenc(utf8str, ioenc, "UTF-8");
--  std::vector<w_char> u;
-+  wide::string u;
-   u8_u16(u, utf8str);
-   if (!u.empty()) {
-     unsigned short idx = (u[0].h << 8) + u[0].l;
--- 
-2.9.3
-
diff --git a/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch b/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch
deleted file mode 100644
index ba48f9f..0000000
--- a/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-From 7c7f56e1c6fe510a2c5e826cc49aeae3f6614f86 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 13:36:20 +0000
-Subject: [PATCH 3/3] Related: hunspell#406 use a basic_string<w_char> instead
- of vector
-
-kcachegrind reports 1,066,887,723 -> 894,015,631 on
-
-echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
----
- src/hunspell/w_char.hxx | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
-index 84ae13c..e112b5c 100644
---- a/src/hunspell/w_char.hxx
-+++ b/src/hunspell/w_char.hxx
-@@ -42,7 +42,6 @@
- #define W_CHAR_HXX_
- 
- #include <string>
--#include <vector>
- 
- #ifndef GCC
- struct w_char {
-@@ -75,7 +74,7 @@ struct replentry {
- 
- namespace wide
- {
--    typedef std::vector<w_char> string;
-+    typedef std::basic_string<w_char> string;
- }
- 
- #endif
--- 
-2.9.3
-
diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
new file mode 100644
index 0000000..ff2530c
--- /dev/null
+++ b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
@@ -0,0 +1,264 @@
+From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 16:36:27 +0000
+Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest
+
+only lower when we have to and reuse scratch buffers as
+tolower destination
+
+kcachegrind reports 830,529,143 -> 779,887,690 on
+
+echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
+---
+ src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++---------------
+ 1 file changed, 95 insertions(+), 48 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index 54a474f..ea52707 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+     u8_u16(w_target, target);
+   }
+   
+-  std::vector<w_char> w_entry;
+   std::string f;
+   std::vector<w_char> w_f;
+-  std::vector<w_char> w_target2;
+   
+   for (size_t i = 0; i < rHMgr.size(); ++i) {
+     while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
+@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         continue;
+ 
+       if (utf8) {
+-        w_entry.clear();
+-        u8_u16(w_entry, HENTRY_WORD(hp));
+-        sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) +
+-             leftcommonsubstring(w_word, w_entry);
++        w_f.clear();
++        u8_u16(w_f, HENTRY_WORD(hp));
++
++        int leftcommon = leftcommonsubstring(w_word, w_f);
++        if (low) {
++          // lowering dictionary word
++          mkallsmall_utf(w_f, langnum);
++        }
++        sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+       } else {
+-        sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
+-             leftcommonsubstring(word, HENTRY_WORD(hp));
++        f.assign(HENTRY_WORD(hp));
++
++        int leftcommon = leftcommonsubstring(word, f.c_str());
++        if (low) {
++          // lowering dictionary word
++          mkallsmall(f, csconv);
++        }
++        sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+       }
+ 
+       // check special pronounciation
+@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         if (utf8) {
+           w_f.clear();
+           u8_u16(w_f, f);
+-          sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
+-                leftcommonsubstring(w_word, w_f);
++
++          int leftcommon = leftcommonsubstring(w_word, w_f);
++          if (low) {
++            // lowering dictionary word
++            mkallsmall_utf(w_f, langnum);
++          }
++          sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+         } else {
+-          sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
+-                leftcommonsubstring(word, f.c_str());
++          int leftcommon = leftcommonsubstring(word, f.c_str());
++          if (low) {
++            // lowering dictionary word
++            mkallsmall(f, csconv);
++          }
++          sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+         }
+         if (sc2 > sc)
+           sc = sc2;
+@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+           candidate = HENTRY_WORD(hp);
+           mkallcap(candidate, csconv);
+         }
+-        std::string target2 = phonet(candidate, *ph);
+-        w_target2.clear();
++        f = phonet(candidate, *ph);
++        w_f.clear();
+         if (utf8) {
+-          u8_u16(w_target2, target2);
+-          scphon = 2 * ngram(3, w_target, w_target2,
++          u8_u16(w_f, f);
++          scphon = 2 * ngram(3, w_target, w_f,
+                              NGRAM_LONGER_WORSE);
+         } else {
+-          scphon = 2 * ngram(3, target, target2,
++          scphon = 2 * ngram(3, target, f,
+                              NGRAM_LONGER_WORSE);
+         }
+       }
+@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         w_mw[k].l = '*';
+         w_mw[k].h = 0;
+       }
+-      thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low);
++
++      if (low) {
++        // lowering dictionary word
++        mkallsmall_utf(w_mw, langnum);
++      }
++
++      thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
+     } else {
+       std::string mw = word;
+       for (int k = sp; k < n; k += 4)
+         mw[k] = '*';
+-      thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
++
++      if (low) {
++        // lowering dictionary word
++        mkallsmall(mw, csconv);
++      }
++
++      thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
+     }
+   }
+   thresh = thresh / 3;
+@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+     return;
+   }
+ 
+-  std::vector<w_char> w_glst_word;
+   for (int i = 0; i < MAX_ROOTS; i++) {
+     if (roots[i]) {
+       struct hentry* rp = roots[i];
+@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ 
+       for (int k = 0; k < nw; k++) {
+         if (utf8) {
+-          w_glst_word.clear();
+-          u8_u16(w_glst_word, glst[k].word);
+-          sc = ngram(n, w_word, w_glst_word,
+-                     NGRAM_ANY_MISMATCH + low) +
+-               leftcommonsubstring(w_word, w_glst_word);
++          w_f.clear();
++          u8_u16(w_f, glst[k].word);
++
++          int leftcommon = leftcommonsubstring(w_word, w_f);
++          if (low) {
++            // lowering dictionary word
++            mkallsmall_utf(w_f, langnum);
++          }
++
++          sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
+         } else {
+-          sc = ngram(n, word, glst[k].word,
+-                     NGRAM_ANY_MISMATCH + low) +
+-               leftcommonsubstring(word, glst[k].word);
++          f = glst[k].word;
++
++          int leftcommon = leftcommonsubstring(word, f.c_str());
++          if (low) {
++            // lowering dictionary word
++            mkallsmall(f, csconv);
++          }
++
++          sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
+         }
+ 
+         if (sc > thresh) {
+@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+       w_gl.clear();
+       if (utf8) {
+         u8_u16(w_gl, gl);
+-        re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+-             ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
++        //w_gl is lowercase already at this point
++        re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        if (low) {
++          w_f = w_word;
++          // lowering dictionary word
++          mkallsmall_utf(w_f, langnum);
++          re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        } else {
++          re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        }
+       } else {
+-        re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+-             ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
++        //gl is lowercase already at this point
++        re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        if (low) {
++          f = word;
++          // lowering dictionary word
++          mkallsmall(f, csconv);
++          re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        } else {
++          re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        }
+       }
+ 
+       int ngram_score, leftcommon_score;
+       if (utf8) {
+-        ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low);
++        //w_gl is lowercase already at this point
++        ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
+         leftcommon_score = leftcommonsubstring(w_word, w_gl);
+       } else {
+-        ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low);
++        //gl is lowercase already at this point
++        ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
+         leftcommon_score = leftcommonsubstring(word, gl.c_str());
+       }
+       gscore[i] =
+@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n,
+   l2 = su2.size();
+   if (l2 == 0)
+     return 0;
+-  // lowering dictionary word
+-  const std::vector<w_char>* p_su2 = &su2;
+-  std::vector<w_char> su2_copy;
+-  if (opt & NGRAM_LOWERING) {
+-    su2_copy = su2;
+-    mkallsmall_utf(su2_copy, langnum);
+-    p_su2 = &su2_copy;
+-  }
+   for (int j = 1; j <= n; j++) {
+     ns = 0;
+     for (int i = 0; i <= (l1 - j); i++) {
+@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n,
+       for (int l = 0; l <= (l2 - j); l++) {
+         for (k = 0; k < j; k++) {
+           const w_char& c1 = su1[i + k];
+-          const w_char& c2 = (*p_su2)[l + k];
++          const w_char& c2 = su2[l + k];
+           if ((c1.l != c2.l) || (c1.h != c2.h))
+             break;
+         }
+@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n,
+   if (l2 == 0)
+     return 0;
+   l1 = s1.size();
+-  std::string t(s2);
+-  if (opt & NGRAM_LOWERING)
+-    mkallsmall(t, csconv);
+   for (int j = 1; j <= n; j++) {
+     ns = 0;
+     for (int i = 0; i <= (l1 - j); i++) {
+-      //t is haystack, s1[i..i+j) is needle
+-      if (t.find(s1.c_str()+i, 0, j) != std::string::npos) {
++      //s2 is haystack, s1[i..i+j) is needle
++      if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
+         ns++;
+       } else if (opt & NGRAM_WEIGHTED) {
+         ns--;
+-- 
+2.9.3
+
diff --git a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
new file mode 100644
index 0000000..6c8a108
--- /dev/null
+++ b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
@@ -0,0 +1,81 @@
+From aab258adbd9c78931a36b96e58975a08000249a8 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 17:14:35 +0000
+Subject: [PATCH 4/4] either clear will be called anyway before use, or its
+ unused afterwards
+
+---
+ src/hunspell/suggestmgr.cxx | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index ea52707..ae34535 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1089,7 +1089,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         continue;
+ 
+       if (utf8) {
+-        w_f.clear();
+         u8_u16(w_f, HENTRY_WORD(hp));
+ 
+         int leftcommon = leftcommonsubstring(w_word, w_f);
+@@ -1115,7 +1114,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+           copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
+         int sc2;
+         if (utf8) {
+-          w_f.clear();
+           u8_u16(w_f, f);
+ 
+           int leftcommon = leftcommonsubstring(w_word, w_f);
+@@ -1139,7 +1137,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+       int scphon = -20000;
+       if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
+         if (utf8) {
+-          w_candidate.clear();
+           u8_u16(w_candidate, HENTRY_WORD(hp));
+           mkallcap_utf(w_candidate, langnum);
+           u16_u8(candidate, w_candidate);
+@@ -1148,7 +1145,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+           mkallcap(candidate, csconv);
+         }
+         f = phonet(candidate, *ph);
+-        w_f.clear();
+         if (utf8) {
+           u8_u16(w_f, f);
+           scphon = 2 * ngram(3, w_target, w_f,
+@@ -1254,7 +1250,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ 
+       for (int k = 0; k < nw; k++) {
+         if (utf8) {
+-          w_f.clear();
+           u8_u16(w_f, glst[k].word);
+ 
+           int leftcommon = leftcommonsubstring(w_word, w_f);
+@@ -1335,7 +1330,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+       std::string gl;
+       int len;
+       if (utf8) {
+-        w_gl.clear();
+         len = u8_u16(w_gl, guess[i]);
+         mkallsmall_utf(w_gl, langnum);
+         u16_u8(gl, w_gl);
+@@ -1355,7 +1349,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+       }
+       // using 2-gram instead of 3, and other weightening
+ 
+-      w_gl.clear();
+       if (utf8) {
+         u8_u16(w_gl, gl);
+         //w_gl is lowercase already at this point
+@@ -1421,7 +1414,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         // lowering rootphon[i]
+         std::string gl;
+         int len;
+-        w_gl.clear();
+         if (utf8) {
+           len = u8_u16(w_gl, rootsphon[i]);
+           mkallsmall_utf(w_gl, langnum);
+-- 
+2.9.3
+
diff --git a/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch b/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
deleted file mode 100644
index 1d896c6..0000000
--- a/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From 1393bd64581d6010a65d368e1031641391bdb154 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 14:30:13 +0000
-Subject: [PATCH 1/2] use a per-hashmgr persistent wide string scratch buffer
-
-kcachegrind reports 894,015,631 -> 845,183,693
----
- src/hunspell/hashmgr.cxx | 40 ++++++++++++++++++----------------------
- src/hunspell/hashmgr.hxx |  1 +
- 2 files changed, 19 insertions(+), 22 deletions(-)
-
-diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
-index 6d92e9b..8d6189b 100644
---- a/src/hunspell/hashmgr.cxx
-+++ b/src/hunspell/hashmgr.cxx
-@@ -345,11 +345,10 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
-     flags2[flagslen] = ONLYUPCASEFLAG;
-     if (utf8) {
-       std::string st;
--      wide::string w;
--      u8_u16(w, word);
--      mkallsmall_utf(w, langnum);
--      mkinitcap_utf(w, langnum);
--      u16_u8(st, w);
-+      u8_u16(workbuf, word);
-+      mkallsmall_utf(workbuf, langnum);
-+      mkinitcap_utf(workbuf, langnum);
-+      u16_u8(st, workbuf);
-       return add_word(st, wcl, flags2, flagslen + 1, dp, true);
-     } else {
-       std::string new_word(word);
-@@ -366,9 +365,8 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
-   int len;
-   if (utf8) {
--    wide::string dest_utf;
--    len = u8_u16(dest_utf, word);
--    *captype = get_captype_utf8(dest_utf, langnum);
-+    len = u8_u16(workbuf, word);
-+    *captype = get_captype_utf8(workbuf, langnum);
-   } else {
-     len = word.size();
-     *captype = get_captype(word, csconv);
-@@ -688,13 +686,12 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
-       break;
-     }
-     case FLAG_UNI: {  // UTF-8 characters
--      wide::string w;
--      u8_u16(w, flags);
--      len = w.size();
-+      u8_u16(workbuf, flags);
-+      len = workbuf.size();
-       *result = (unsigned short*)malloc(len * sizeof(unsigned short));
-       if (!*result)
-         return -1;
--      memcpy(*result, &w[0], len * sizeof(short));
-+      memcpy(*result, &workbuf[0], len * sizeof(short));
-       break;
-     }
-     default: {  // Ispell's one-character flags (erfg -> e r f g)
-@@ -760,12 +757,11 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
-       break;
-     }
-     case FLAG_UNI: {  // UTF-8 characters
--      wide::string w;
--      u8_u16(w, flags);
--      size_t len = w.size();
-+      u8_u16(workbuf, flags);
-+      size_t len = workbuf.size();
-       size_t origsize = result.size();
-       result.resize(origsize + len);
--      memcpy(&result[origsize], &w[0], len * sizeof(short));
-+      memcpy(&result[origsize], &workbuf[0], len * sizeof(short));
-       break;
-     }
-     default: {  // Ispell's one-character flags (erfg -> e r f g)
-@@ -793,10 +789,9 @@ unsigned short HashMgr::decode_flag(const char* f) const {
-       s = (unsigned short)i;
-       break;
-     case FLAG_UNI: {
--      wide::string w;
--      u8_u16(w, f);
--      if (!w.empty())
--          memcpy(&s, &w[0], 1 * sizeof(short));
-+      u8_u16(workbuf, f);
-+      if (!workbuf.empty())
-+          memcpy(&s, &workbuf[0], 1 * sizeof(short));
-       break;
-     }
-     default:
-@@ -820,8 +815,9 @@ char* HashMgr::encode_flag(unsigned short f) const {
-     ch = stream.str();
-   } else if (flag_mode == FLAG_UNI) {
-     const w_char* w_c = (const w_char*)&f;
--    wide::string w(w_c, w_c + 1);
--    u16_u8(ch, w);
-+    workbuf.clear();
-+    workbuf.push_back(*w_c);
-+    u16_u8(ch, workbuf);
-   } else {
-     ch.push_back((unsigned char)(f));
-   }
-diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
-index 312c8ba..78ffb44 100644
---- a/src/hunspell/hashmgr.hxx
-+++ b/src/hunspell/hashmgr.hxx
-@@ -97,6 +97,7 @@ class HashMgr {
-   struct cs_info* csconv;
-   std::string ignorechars;
-   wide::string ignorechars_utf16;
-+  mutable wide::string workbuf;
-   int numaliasf;  // flag vector `compression' with aliases
-   unsigned short** aliasf;
-   unsigned short* aliasflen;
--- 
-2.9.3
-
diff --git a/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch b/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
deleted file mode 100644
index a892f74..0000000
--- a/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
+++ /dev/null
@@ -1,168 +0,0 @@
-From 5c7bfa8d36b87a0649f6f88b20624c38a3a5f0ae Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 14:43:19 +0000
-Subject: [PATCH 2/2] use a per-hashmgr persistent wide string scratch buffer
-
-kcachegrind reports 845,183,693 -> 812,760,392
----
- src/hunspell/suggestmgr.cxx | 55 ++++++++++++++++++++-------------------------
- src/hunspell/suggestmgr.hxx |  3 +++
- 2 files changed, 27 insertions(+), 31 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 1deec96..f5ea01b 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -491,10 +491,11 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
-     if (word[i] == word[i - 2]) {
-       state++;
-       if (state == 3) {
--        wide::string candidate_utf(word, word + i - 1);
--        candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
-+        workbuf1.clear();
-+        workbuf1.insert(workbuf1.end(), word, word + i - 1);
-+        workbuf1.insert(workbuf1.end(), word + i + 1, word + wl);
-         std::string candidate;
--        u16_u8(candidate, candidate_utf);
-+        u16_u8(candidate, workbuf1);
-         testsug(wlst, candidate, cpdsuggest, NULL, NULL);
-         state = 0;
-       }
-@@ -1050,12 +1051,11 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-   phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
-   std::string target;
-   std::string candidate;
--  wide::string w_candidate;
-   if (ph) {
-     if (utf8) {
--      u8_u16(w_candidate, word);
--      mkallcap_utf(w_candidate, langnum);
--      u16_u8(candidate, w_candidate);
-+      u8_u16(workbuf1, word);
-+      mkallcap_utf(workbuf1, langnum);
-+      u16_u8(candidate, workbuf1);
-     } else {
-       candidate.assign(word);
-       if (!nonbmp)
-@@ -1121,10 +1121,9 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
-       int scphon = -20000;
-       if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
-         if (utf8) {
--          w_candidate.clear();
--          u8_u16(w_candidate, HENTRY_WORD(hp));
--          mkallcap_utf(w_candidate, langnum);
--          u16_u8(candidate, w_candidate);
-+          u8_u16(workbuf1, HENTRY_WORD(hp));
-+          mkallcap_utf(workbuf1, langnum);
-+          u16_u8(candidate, workbuf1);
-         } else {
-           candidate = HENTRY_WORD(hp);
-           mkallcap(candidate, csconv);
-@@ -1804,11 +1803,10 @@ int SuggestMgr::ngram(int n,
-     return 0;
-   // lowering dictionary word
-   const wide::string* p_su2 = &su2;
--  wide::string su2_copy;
-   if (opt & NGRAM_LOWERING) {
--    su2_copy = su2;
--    mkallsmall_utf(su2_copy, langnum);
--    p_su2 = &su2_copy;
-+    workbuf1 = su2;
-+    mkallsmall_utf(workbuf1, langnum);
-+    p_su2 = &workbuf1;
-   }
-   for (int j = 1; j <= n; j++) {
-     ns = 0;
-@@ -1948,22 +1946,20 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
-   int diffpos[2];
-   *is_swap = 0;
-   if (utf8) {
--    wide::string su1;
--    wide::string su2;
--    int l1 = u8_u16(su1, s1);
--    int l2 = u8_u16(su2, s2);
-+    int l1 = u8_u16(workbuf1, s1);
-+    int l2 = u8_u16(workbuf2, s2);
- 
-     if (l1 <= 0 || l2 <= 0)
-       return 0;
- 
-     // decapitalize dictionary word
-     if (complexprefixes) {
--      su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
-+      workbuf2[l2 - 1] = lower_utf(workbuf2[l2 - 1], langnum);
-     } else {
--      su2[0] = lower_utf(su2[0], langnum);
-+      workbuf2[0] = lower_utf(workbuf2[0], langnum);
-     }
-     for (int i = 0; (i < l1) && (i < l2); i++) {
--      if (su1[i] == su2[i]) {
-+      if (workbuf1[i] == workbuf2[i]) {
-         num++;
-       } else {
-         if (diff < 2)
-@@ -1972,8 +1968,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
-       }
-     }
-     if ((diff == 2) && (l1 == l2) &&
--        (su1[diffpos[0]] == su2[diffpos[1]]) &&
--        (su1[diffpos[1]] == su2[diffpos[0]]))
-+        (workbuf1[diffpos[0]] == workbuf2[diffpos[1]]) &&
-+        (workbuf1[diffpos[1]] == workbuf2[diffpos[0]]))
-       *is_swap = 1;
-   } else {
-     size_t i;
-@@ -2004,8 +2000,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
- 
- int SuggestMgr::mystrlen(const char* word) {
-   if (utf8) {
--    wide::string w;
--    return u8_u16(w, word);
-+    return u8_u16(workbuf1, word);
-   } else
-     return strlen(word);
- }
-@@ -2044,15 +2039,13 @@ void SuggestMgr::lcs(const char* s,
-                      int* l2,
-                      char** result) {
-   int n, m;
--  wide::string su;
--  wide::string su2;
-   char* b;
-   char* c;
-   int i;
-   int j;
-   if (utf8) {
--    m = u8_u16(su, s);
--    n = u8_u16(su2, s2);
-+    m = u8_u16(workbuf1, s);
-+    n = u8_u16(workbuf2, s2);
-   } else {
-     m = strlen(s);
-     n = strlen(s2);
-@@ -2073,7 +2066,7 @@ void SuggestMgr::lcs(const char* s,
-     c[j] = 0;
-   for (i = 1; i <= m; i++) {
-     for (j = 1; j <= n; j++) {
--      if (((utf8) && (su[i - 1] == su2[j - 1])) ||
-+      if (((utf8) && (workbuf1[i - 1] == workbuf2[j - 1])) ||
-           ((!utf8) && (s[i - 1] == s2[j - 1]))) {
-         c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
-         b[i * (n + 1) + j] = LCS_UPLEFT;
-diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
-index 9bfa933..80b0fd7 100644
---- a/src/hunspell/suggestmgr.hxx
-+++ b/src/hunspell/suggestmgr.hxx
-@@ -105,6 +105,9 @@ class SuggestMgr {
-   SuggestMgr& operator=(const SuggestMgr&);
- 
-  private:
-+  mutable wide::string workbuf1;
-+  mutable wide::string workbuf2;
-+
-   char* ckey;
-   size_t ckeyl;
-   wide::string ckey_utf;
--- 
-2.9.3
-
diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk
index e493366..b30bd08 100644
--- a/external/hunspell/UnpackedTarball_hunspell.mk
+++ b/external/hunspell/UnpackedTarball_hunspell.mk
@@ -22,10 +22,12 @@ $(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1))
 $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
 	external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \
 	external/hunspell/0001-unroll-this-a-bit.patch \
-	external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch \
-	external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch \
-	external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
-	external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
+	external/hunspell/0001-cppcheck-redundant-c_str.patch \
+	external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch \
+	external/hunspell/0001-loop-via-iterators.patch \
+	external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \
+	external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \
+	external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \
 ))
 
 # vim: set noet sw=4 ts=4:


More information about the Libreoffice-commits mailing list