[Libreoffice-commits] core.git: external/hunspell
Caolán McNamara
caolanm at redhat.com
Sun Feb 12 21:18:24 UTC 2017
external/hunspell/0001-cppcheck-redundant-c_str.patch | 34
external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch | 57
external/hunspell/0001-loop-via-iterators.patch | 36
external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch | 78
external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch | 912 ----------
external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch | 37
external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch | 264 ++
external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch | 81
external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch | 117 -
external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch | 168 -
external/hunspell/UnpackedTarball_hunspell.mk | 10
11 files changed, 556 insertions(+), 1238 deletions(-)
New commits:
commit 163435fa23fbfc237a7718c9d440a98847e4f626
Author: Caolán McNamara <caolanm at redhat.com>
Date: Sun Feb 12 17:20:56 2017 +0000
use alternative optimizations for buffer creation bottleneck
Change-Id: I9f29e8d3e5e97fe403a3e0d7d03c6ac01c7689c4
diff --git a/external/hunspell/0001-cppcheck-redundant-c_str.patch b/external/hunspell/0001-cppcheck-redundant-c_str.patch
new file mode 100644
index 0000000..276ddd2
--- /dev/null
+++ b/external/hunspell/0001-cppcheck-redundant-c_str.patch
@@ -0,0 +1,34 @@
+From 9a0baf202f67291eaf482f1bcf654e21d71943e2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Mon, 23 Jan 2017 11:43:53 +0000
+Subject: [PATCH] cppcheck: redundant c_str
+
+---
+ src/hunspell/suggestmgr.cxx | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index b998341..8d46dd6 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1107,7 +1107,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ int sc2;
+ if (utf8) {
+ w_f.clear();
+- u8_u16(w_f, f.c_str());
++ u8_u16(w_f, f);
+ sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
+ leftcommonsubstring(w_word, w_f);
+ } else {
+@@ -1132,7 +1132,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ std::string target2 = phonet(candidate, *ph);
+ w_target2.clear();
+ if (utf8) {
+- u8_u16(w_target2, target2.c_str());
++ u8_u16(w_target2, target2);
+ scphon = 2 * ngram(3, w_target, w_target2,
+ NGRAM_LONGER_WORSE);
+ } else {
+--
+2.9.3
+
diff --git a/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
new file mode 100644
index 0000000..bfcdf49
--- /dev/null
+++ b/external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch
@@ -0,0 +1,57 @@
+From 93156ba9a8e644f8b0b724880668714adcb0d094 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Mon, 23 Jan 2017 12:05:07 +0000
+Subject: [PATCH] cppcheck: rv is reassigned before old value used
+
+---
+ src/hunspell/affixmgr.cxx | 6 ++----
+ src/hunspell/suggestmgr.cxx | 3 +--
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
+index 680cbe9..21cf384 100644
+--- a/src/hunspell/affixmgr.cxx
++++ b/src/hunspell/affixmgr.cxx
+@@ -1494,9 +1494,8 @@ int AffixMgr::defcpd_check(hentry*** words,
+ }
+
+ inline int AffixMgr::candidate_check(const char* word, int len) {
+- struct hentry* rv = NULL;
+
+- rv = lookup(word);
++ struct hentry* rv = lookup(word);
+ if (rv)
+ return 1;
+
+@@ -3045,10 +3044,9 @@ struct hentry* AffixMgr::affix_check(const char* word,
+ int len,
+ const FLAG needflag,
+ char in_compound) {
+- struct hentry* rv = NULL;
+
+ // check all prefixes (also crossed with suffixes if allowed)
+- rv = prefix_check(word, len, in_compound, needflag);
++ struct hentry* rv = prefix_check(word, len, in_compound, needflag);
+ if (rv)
+ return rv;
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index 8d46dd6..54a474f 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1675,11 +1675,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
+ if (HENTRY_DATA(rv))
+ p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
+ while (p) {
+- struct hentry* rv2 = NULL;
+ p += MORPH_TAG_LEN;
+ int plen = fieldlen(p);
+ std::string allomorph(p, plen);
+- rv2 = pAMgr->lookup(allomorph.c_str());
++ struct hentry* rv2 = pAMgr->lookup(allomorph.c_str());
+ while (rv2) {
+ // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
+ // sfxcount) {
+--
+2.9.3
+
diff --git a/external/hunspell/0001-loop-via-iterators.patch b/external/hunspell/0001-loop-via-iterators.patch
new file mode 100644
index 0000000..6ecdd76
--- /dev/null
+++ b/external/hunspell/0001-loop-via-iterators.patch
@@ -0,0 +1,36 @@
+From f366e97fa8d7ad21060033b733dda15299edf7c5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 15:37:11 +0000
+Subject: [PATCH 1/4] loop via iterators
+
+---
+ src/hunspell/csutil.cxx | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
+index c1666a5..2408677 100644
+--- a/src/hunspell/csutil.cxx
++++ b/src/hunspell/csutil.cxx
+@@ -2537,13 +2537,17 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
+ size_t ncap = 0;
+ size_t nneutral = 0;
+ size_t firstcap = 0;
+- for (size_t i = 0; i < word.size(); ++i) {
+- unsigned short idx = (word[i].h << 8) + word[i].l;
++
++ std::vector<w_char>::const_iterator it = word.begin();
++ std::vector<w_char>::const_iterator it_end = word.end();
++ while (it != it_end) {
++ unsigned short idx = (it->h << 8) + it->l;
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx)
+ ncap++;
+ if (unicodetoupper(idx, langnum) == lwridx)
+ nneutral++;
++ ++it;
+ }
+ if (ncap) {
+ unsigned short idx = (word[0].h << 8) + word[0].l;
+--
+2.9.3
+
diff --git a/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
new file mode 100644
index 0000000..88695ec
--- /dev/null
+++ b/external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch
@@ -0,0 +1,78 @@
+From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 15:49:17 +0000
+Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer
+
+kcachegrind reports 1,057,506,901 -> 830,529,143 on
+
+echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
+---
+ src/hunspell/hashmgr.cxx | 16 +++++++++++-----
+ src/hunspell/hashmgr.hxx | 1 +
+ 2 files changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
+index 1de1690..4844b49 100644
+--- a/src/hunspell/hashmgr.cxx
++++ b/src/hunspell/hashmgr.cxx
+@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
+ }
+
+ // detect captype and modify word length for UTF-8 encoding
+-int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
++int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
+ int len;
+ if (utf8) {
+- std::vector<w_char> dest_utf;
+- len = u8_u16(dest_utf, word);
+- *captype = get_captype_utf8(dest_utf, langnum);
++ len = u8_u16(workbuf, word);
++ *captype = get_captype_utf8(workbuf, langnum);
+ } else {
+ len = word.size();
+ *captype = get_captype(word, csconv);
+@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
+ return len;
+ }
+
++int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
++ std::vector<w_char> workbuf;
++ return get_clen_and_captype(word, captype, workbuf);
++}
++
+ // remove word (personal dictionary function for standalone applications)
+ int HashMgr::remove(const std::string& word) {
+ struct hentry* dp = lookup(word.c_str());
+@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
+ // loop through all words on much list and add to hash
+ // table and create word and affix strings
+
++ std::vector<w_char> workbuf;
++
+ while (dict->getline(ts)) {
+ mychomp(ts);
+ // split each line into word and morphological description
+@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
+ }
+
+ int captype;
+- int wcl = get_clen_and_captype(ts, &captype);
++ int wcl = get_clen_and_captype(ts, &captype, workbuf);
+ const std::string *dp_str = dp.empty() ? NULL : &dp;
+ // add the word and its index plus its capitalized form optionally
+ if (add_word(ts, wcl, flags, al, dp_str, false) ||
+diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
+index 812171a..5a09c45 100644
+--- a/src/hunspell/hashmgr.hxx
++++ b/src/hunspell/hashmgr.hxx
+@@ -125,6 +125,7 @@ class HashMgr {
+
+ private:
+ int get_clen_and_captype(const std::string& word, int* captype);
++ int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
+ int load_tables(const char* tpath, const char* key);
+ int add_word(const std::string& word,
+ int wcl,
+--
+2.9.3
+
diff --git a/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch b/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch
deleted file mode 100644
index 31b8c04..0000000
--- a/external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch
+++ /dev/null
@@ -1,912 +0,0 @@
-From 3a935abd0539143ee952d2f86ec513be6a056d5e Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 13:35:13 +0000
-Subject: [PATCH 2/3] rename std::vector<w_char> to wide::string
-
----
- src/hunspell/affixmgr.cxx | 8 ++---
- src/hunspell/affixmgr.hxx | 10 +++----
- src/hunspell/csutil.cxx | 28 +++++++++---------
- src/hunspell/csutil.hxx | 26 ++++++++--------
- src/hunspell/hashmgr.cxx | 12 ++++----
- src/hunspell/hashmgr.hxx | 2 +-
- src/hunspell/hunspell.cxx | 46 ++++++++++++++---------------
- src/hunspell/hunspell.hxx | 2 +-
- src/hunspell/suggestmgr.cxx | 72 ++++++++++++++++++++++-----------------------
- src/hunspell/suggestmgr.hxx | 12 ++++----
- src/hunspell/w_char.hxx | 6 ++++
- src/parsers/textparser.cxx | 2 +-
- src/tools/hunspell.cxx | 10 +++----
- 13 files changed, 121 insertions(+), 115 deletions(-)
-
-diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
-index 21cf384..4f64721 100644
---- a/src/hunspell/affixmgr.cxx
-+++ b/src/hunspell/affixmgr.cxx
-@@ -1338,7 +1338,7 @@ int AffixMgr::cpdcase_check(const char* word, int pos) {
- for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)
- ;
- std::string pair(p);
-- std::vector<w_char> pair_u;
-+ wide::string pair_u;
- u8_u16(pair_u, pair);
- unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0;
- unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;
-@@ -1523,7 +1523,7 @@ short AffixMgr::get_syllable(const std::string& word) {
- }
- }
- } else if (!cpdvowels_utf16.empty()) {
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, word);
- for (size_t i = 0; i < w.size(); ++i) {
- if (std::binary_search(cpdvowels_utf16.begin(),
-@@ -3505,7 +3505,7 @@ const char* AffixMgr::get_ignore() const {
- }
-
- // return the preferred ignore string for suggestions
--const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {
-+const wide::string& AffixMgr::get_ignore_utf16() const {
- return ignorechars_utf16;
- }
-
-@@ -3528,7 +3528,7 @@ const std::string& AffixMgr::get_wordchars() const {
- return wordchars;
- }
-
--const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {
-+const wide::string& AffixMgr::get_wordchars_utf16() const {
- return wordchars_utf16;
- }
-
-diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
-index 83a4b42..11f1a67 100644
---- a/src/hunspell/affixmgr.hxx
-+++ b/src/hunspell/affixmgr.hxx
-@@ -146,7 +146,7 @@ class AffixMgr {
- int cpdwordmax;
- int cpdmaxsyllable;
- std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
-- std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
-+ wide::string cpdvowels_utf16; //vowels for UTF-8 encoding
- std::string cpdsyllablenum; // syllable count incrementing flag
- const char* pfxappnd; // BUG: not stateless
- const char* sfxappnd; // BUG: not stateless
-@@ -157,9 +157,9 @@ class AffixMgr {
- PfxEntry* pfx; // BUG: not stateless
- int checknum;
- std::string wordchars; // letters + spec. word characters
-- std::vector<w_char> wordchars_utf16;
-+ wide::string wordchars_utf16;
- std::string ignorechars; // letters + spec. word characters
-- std::vector<w_char> ignorechars_utf16;
-+ wide::string ignorechars_utf16;
- std::string version; // affix and dictionary file version string
- std::string lang; // language
- int langnum;
-@@ -306,9 +306,9 @@ class AffixMgr {
- char* get_key_string();
- char* get_try_string() const;
- const std::string& get_wordchars() const;
-- const std::vector<w_char>& get_wordchars_utf16() const;
-+ const wide::string& get_wordchars_utf16() const;
- const char* get_ignore() const;
-- const std::vector<w_char>& get_ignore_utf16() const;
-+ const wide::string& get_ignore_utf16() const;
- int get_compound() const;
- FLAG get_compoundflag() const;
- FLAG get_forbiddenword() const;
-diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
-index c1666a5..2f59b3d 100644
---- a/src/hunspell/csutil.cxx
-+++ b/src/hunspell/csutil.cxx
-@@ -143,10 +143,10 @@ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mod
- stream.open(path, mode);
- }
-
--std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
-+std::string& u16_u8(std::string& dest, const wide::string& src) {
- dest.clear();
-- std::vector<w_char>::const_iterator u2 = src.begin();
-- std::vector<w_char>::const_iterator u2_max = src.end();
-+ wide::string::const_iterator u2 = src.begin();
-+ wide::string::const_iterator u2_max = src.end();
- while (u2 < u2_max) {
- signed char u8;
- if (u2->h) { // > 0xFF
-@@ -180,7 +180,7 @@ std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
- return dest;
- }
-
--int u8_u16(std::vector<w_char>& dest, const std::string& src) {
-+int u8_u16(wide::string& dest, const std::string& src) {
- dest.clear();
- std::string::const_iterator u8 = src.begin();
- std::string::const_iterator u8_max = src.end();
-@@ -474,7 +474,7 @@ size_t reverseword(std::string& word) {
-
- // reverse word
- size_t reverseword_utf(std::string& word) {
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, word);
- std::reverse(w.begin(), w.end());
- u16_u8(word, w);
-@@ -552,7 +552,7 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
- return s;
- }
-
--std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
-+wide::string& mkallsmall_utf(wide::string& u,
- int langnum) {
- for (size_t i = 0; i < u.size(); ++i) {
- unsigned short idx = (u[i].h << 8) + u[i].l;
-@@ -565,7 +565,7 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
- return u;
- }
-
--std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
-+wide::string& mkallcap_utf(wide::string& u, int langnum) {
- for (size_t i = 0; i < u.size(); i++) {
- unsigned short idx = (u[i].h << 8) + u[i].l;
- unsigned short upridx = unicodetoupper(idx, langnum);
-@@ -584,7 +584,7 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
- return s;
- }
-
--std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
-+wide::string& mkinitcap_utf(wide::string& u, int langnum) {
- if (!u.empty()) {
- unsigned short idx = (u[0].h << 8) + u[0].l;
- unsigned short upridx = unicodetoupper(idx, langnum);
-@@ -603,7 +603,7 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
- return s;
- }
-
--std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
-+wide::string& mkinitsmall_utf(wide::string& u, int langnum) {
- if (!u.empty()) {
- unsigned short idx = (u[0].h << 8) + u[0].l;
- unsigned short lwridx = unicodetolower(idx, langnum);
-@@ -2532,7 +2532,7 @@ int get_captype(const std::string& word, cs_info* csconv) {
- return HUHCAP;
- }
-
--int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
-+int get_captype_utf8(const wide::string& word, int langnum) {
- // now determine the capitalization type of the first nl letters
- size_t ncap = 0;
- size_t nneutral = 0;
-@@ -2565,9 +2565,9 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
-
- // strip all ignored characters in the string
- size_t remove_ignored_chars_utf(std::string& word,
-- const std::vector<w_char>& ignored_chars) {
-- std::vector<w_char> w;
-- std::vector<w_char> w2;
-+ const wide::string& ignored_chars) {
-+ wide::string w;
-+ wide::string w2;
- u8_u16(w, word);
-
- for (size_t i = 0; i < w.size(); ++i) {
-@@ -2626,7 +2626,7 @@ bool parse_string(const std::string& line, std::string& out, int ln) {
-
- bool parse_array(const std::string& line,
- std::string& out,
-- std::vector<w_char>& out_utf16,
-+ wide::string& out_utf16,
- int utf8,
- int ln) {
- if (!parse_string(line, out, ln))
-diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx
-index 302d7e9..313672e 100644
---- a/src/hunspell/csutil.hxx
-+++ b/src/hunspell/csutil.hxx
-@@ -134,10 +134,10 @@ LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
-
- // convert UTF-16 characters to UTF-8
- LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
-- const std::vector<w_char>& src);
-+ const wide::string& src);
-
- // convert UTF-8 characters to UTF-16
--LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
-+LIBHUNSPELL_DLL_EXPORTED int u8_u16(wide::string& dest,
- const std::string& src);
-
- // remove end of line char(s)
-@@ -219,31 +219,31 @@ LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
- const struct cs_info* csconv);
-
- // convert first letter of UTF-8 string to capital
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkinitcap_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkinitcap_utf(wide::string& u, int langnum);
-
- // convert UTF-8 string to little
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkallsmall_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkallsmall_utf(wide::string& u, int langnum);
-
- // convert first letter of UTF-8 string to little
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkinitsmall_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkinitsmall_utf(wide::string& u, int langnum);
-
- // convert UTF-8 string to capital
--LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
--mkallcap_utf(std::vector<w_char>& u, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED wide::string&
-+mkallcap_utf(wide::string& u, int langnum);
-
- // get type of capitalization
- LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
-
- // get type of capitalization (UTF-8)
--LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
-+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const wide::string& q, int langnum);
-
- // strip all ignored characters in the string
- LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
- std::string& word,
-- const std::vector<w_char>& ignored_chars);
-+ const wide::string& ignored_chars);
-
- // strip all ignored characters in the string
- LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
-@@ -256,7 +256,7 @@ LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
-
- LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
- std::string& out,
-- std::vector<w_char>& out_utf16,
-+ wide::string& out_utf16,
- int utf8,
- int ln);
-
-diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
-index 1de1690..6d92e9b 100644
---- a/src/hunspell/hashmgr.cxx
-+++ b/src/hunspell/hashmgr.cxx
-@@ -345,7 +345,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- flags2[flagslen] = ONLYUPCASEFLAG;
- if (utf8) {
- std::string st;
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, word);
- mkallsmall_utf(w, langnum);
- mkinitcap_utf(w, langnum);
-@@ -366,7 +366,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
- int len;
- if (utf8) {
-- std::vector<w_char> dest_utf;
-+ wide::string dest_utf;
- len = u8_u16(dest_utf, word);
- *captype = get_captype_utf8(dest_utf, langnum);
- } else {
-@@ -688,7 +688,7 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
- break;
- }
- case FLAG_UNI: { // UTF-8 characters
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, flags);
- len = w.size();
- *result = (unsigned short*)malloc(len * sizeof(unsigned short));
-@@ -760,7 +760,7 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
- break;
- }
- case FLAG_UNI: { // UTF-8 characters
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, flags);
- size_t len = w.size();
- size_t origsize = result.size();
-@@ -793,7 +793,7 @@ unsigned short HashMgr::decode_flag(const char* f) const {
- s = (unsigned short)i;
- break;
- case FLAG_UNI: {
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, f);
- if (!w.empty())
- memcpy(&s, &w[0], 1 * sizeof(short));
-@@ -820,7 +820,7 @@ char* HashMgr::encode_flag(unsigned short f) const {
- ch = stream.str();
- } else if (flag_mode == FLAG_UNI) {
- const w_char* w_c = (const w_char*)&f;
-- std::vector<w_char> w(w_c, w_c + 1);
-+ wide::string w(w_c, w_c + 1);
- u16_u8(ch, w);
- } else {
- ch.push_back((unsigned char)(f));
-diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
-index 812171a..312c8ba 100644
---- a/src/hunspell/hashmgr.hxx
-+++ b/src/hunspell/hashmgr.hxx
-@@ -96,7 +96,7 @@ class HashMgr {
- std::string lang;
- struct cs_info* csconv;
- std::string ignorechars;
-- std::vector<w_char> ignorechars_utf16;
-+ wide::string ignorechars_utf16;
- int numaliasf; // flag vector `compression' with aliases
- unsigned short** aliasf;
- unsigned short* aliasflen;
-diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx
-index a8d78dc..46f1df9 100644
---- a/src/hunspell/hunspell.cxx
-+++ b/src/hunspell/hunspell.cxx
-@@ -103,7 +103,7 @@ public:
- bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
- std::vector<std::string> suggest(const std::string& word);
- const std::string& get_wordchars() const;
-- const std::vector<w_char>& get_wordchars_utf16() const;
-+ const wide::string& get_wordchars_utf16() const;
- const std::string& get_dict_encoding() const;
- int add(const std::string& word);
- int add_with_affix(const std::string& word, const std::string& example);
-@@ -127,15 +127,15 @@ private:
- private:
- void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
- size_t cleanword2(std::string& dest,
-- std::vector<w_char>& dest_u,
-+ wide::string& dest_u,
- const std::string& src,
- int* pcaptype,
- size_t* pabbrev);
- void mkinitcap(std::string& u8);
-- int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
-- int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
-+ int mkinitcap2(std::string& u8, wide::string& u16);
-+ int mkinitsmall2(std::string& u8, wide::string& u16);
- void mkallcap(std::string& u8);
-- int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
-+ int mkallsmall2(std::string& u8, wide::string& u16);
- struct hentry* checkword(const std::string& source, int* info, std::string* root);
- std::string sharps_u8_l1(const std::string& source);
- hentry*
-@@ -231,7 +231,7 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) {
- // return the length of the "cleaned" (and UTF-8 encoded) word
-
- size_t HunspellImpl::cleanword2(std::string& dest,
-- std::vector<w_char>& dest_utf,
-+ wide::string& dest_utf,
- const std::string& src,
- int* pcaptype,
- size_t* pabbrev) {
-@@ -313,7 +313,7 @@ void HunspellImpl::cleanword(std::string& dest,
- // remember to terminate the destination string
- firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
- } else {
-- std::vector<w_char> t;
-+ wide::string t;
- u8_u16(t, src);
- for (size_t i = 0; i < t.size(); ++i) {
- unsigned short idx = (t[i].h << 8) + t[i].l;
-@@ -346,7 +346,7 @@ void HunspellImpl::cleanword(std::string& dest,
-
- void HunspellImpl::mkallcap(std::string& u8) {
- if (utf8) {
-- std::vector<w_char> u16;
-+ wide::string u16;
- u8_u16(u16, u8);
- ::mkallcap_utf(u16, langnum);
- u16_u8(u8, u16);
-@@ -355,7 +355,7 @@ void HunspellImpl::mkallcap(std::string& u8) {
- }
- }
-
--int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
-+int HunspellImpl::mkallsmall2(std::string& u8, wide::string& u16) {
- if (utf8) {
- ::mkallsmall_utf(u16, langnum);
- u16_u8(u8, u16);
-@@ -438,7 +438,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
- size_t wl = 0;
-
- std::string scw;
-- std::vector<w_char> sunicw;
-+ wide::string sunicw;
-
- // input conversion
- RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
-@@ -519,7 +519,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
- std::string part1 = scw.substr(0, apos+1);
- std::string part2 = scw.substr(apos+1);
- if (utf8) {
-- std::vector<w_char> part1u, part2u;
-+ wide::string part1u, part2u;
- u8_u16(part1u, part1);
- u8_u16(part2u, part2);
- mkinitcap2(part2, part2u);
-@@ -704,7 +704,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
- if (ignoredchars != NULL) {
- w2.assign(w);
- if (utf8) {
-- const std::vector<w_char>& ignoredchars_utf16 =
-+ const wide::string& ignoredchars_utf16 =
- pAMgr->get_ignore_utf16();
- remove_ignored_chars_utf(w2, ignoredchars_utf16);
- } else {
-@@ -855,7 +855,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
- size_t wl = 0;
-
- std::string scw;
-- std::vector<w_char> sunicw;
-+ wide::string sunicw;
-
- // input conversion
- RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-@@ -909,7 +909,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
- std::string postdot = scw.substr(dot_pos + 1);
- int captype_;
- if (utf8) {
-- std::vector<w_char> postdotu;
-+ wide::string postdotu;
- u8_u16(postdotu, postdot);
- captype_ = get_captype_utf8(postdotu, langnum);
- } else {
-@@ -951,7 +951,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
- if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
- std::string first(slst[j].c_str(), space + 1);
- std::string second(space + 1);
-- std::vector<w_char> w;
-+ wide::string w;
- if (utf8)
- u8_u16(w, second);
- mkinitcap2(second, w);
-@@ -1109,7 +1109,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
- for (size_t j = 0; j < slst.size(); ++j) {
- if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
- std::string s;
-- std::vector<w_char> w;
-+ wide::string w;
- if (utf8) {
- u8_u16(w, slst[j]);
- } else {
-@@ -1262,17 +1262,17 @@ const std::string& HunspellImpl::get_wordchars() const {
- return pAMgr->get_wordchars();
- }
-
--const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
-+const wide::string& Hunspell::get_wordchars_utf16() const {
- return m_Impl->get_wordchars_utf16();
- }
-
--const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
-+const wide::string& HunspellImpl::get_wordchars_utf16() const {
- return pAMgr->get_wordchars_utf16();
- }
-
- void HunspellImpl::mkinitcap(std::string& u8) {
- if (utf8) {
-- std::vector<w_char> u16;
-+ wide::string u16;
- u8_u16(u16, u8);
- ::mkinitcap_utf(u16, langnum);
- u16_u8(u8, u16);
-@@ -1281,7 +1281,7 @@ void HunspellImpl::mkinitcap(std::string& u8) {
- }
- }
-
--int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
-+int HunspellImpl::mkinitcap2(std::string& u8, wide::string& u16) {
- if (utf8) {
- ::mkinitcap_utf(u16, langnum);
- u16_u8(u8, u16);
-@@ -1291,7 +1291,7 @@ int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
- return u8.size();
- }
-
--int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
-+int HunspellImpl::mkinitsmall2(std::string& u8, wide::string& u16) {
- if (utf8) {
- ::mkinitsmall_utf(u16, langnum);
- u16_u8(u8, u16);
-@@ -1379,7 +1379,7 @@ std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
- size_t wl = 0;
-
- std::string scw;
-- std::vector<w_char> sunicw;
-+ wide::string sunicw;
-
- // input conversion
- RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
-@@ -1994,7 +1994,7 @@ std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_wo
- if (ignoredchars != NULL) {
- w2.assign(root_word);
- if (utf8) {
-- const std::vector<w_char>& ignoredchars_utf16 =
-+ const wide::string& ignoredchars_utf16 =
- pAMgr->get_ignore_utf16();
- remove_ignored_chars_utf(w2, ignoredchars_utf16);
- } else {
-diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx
-index 43af66b..375a7da 100644
---- a/src/hunspell/hunspell.hxx
-+++ b/src/hunspell/hunspell.hxx
-@@ -215,7 +215,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
- /* get extra word characters definied in affix file for tokenization */
- const char* get_wordchars() const;
- const std::string& get_wordchars_cpp() const;
-- const std::vector<w_char>& get_wordchars_utf16() const;
-+ const wide::string& get_wordchars_utf16() const;
-
- struct cs_info* get_csconv();
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 54a474f..1deec96 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -179,7 +179,7 @@ void SuggestMgr::suggest(std::vector<std::string>& slst,
- const char* w,
- int* onlycompoundsug) {
- int nocompoundtwowords = 0;
-- std::vector<w_char> word_utf;
-+ wide::string word_utf;
- int wl = 0;
- size_t nsugorig = slst.size();
- std::string w2;
-@@ -313,7 +313,7 @@ void SuggestMgr::capchars_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- mkallcap_utf(candidate_utf, langnum);
- std::string candidate;
- u16_u8(candidate, candidate_utf);
-@@ -491,7 +491,7 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
- if (word[i] == word[i - 2]) {
- state++;
- if (state == 3) {
-- std::vector<w_char> candidate_utf(word, word + i - 1);
-+ wide::string candidate_utf(word, word + i - 1);
- candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
- std::string candidate;
- u16_u8(candidate, candidate_utf);
-@@ -549,7 +549,7 @@ int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst,
- int wl,
- int cpdsuggest) {
- std::string candidate;
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- // swap out each char one by one and try all the tryme
- // chars in its place to see if that makes a good word
- for (int i = 0; i < wl; i++) {
-@@ -614,7 +614,7 @@ int SuggestMgr::badchar_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- std::string candidate;
- clock_t timelimit = clock();
- int timer = MINTIMER;
-@@ -641,7 +641,7 @@ int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- if (candidate_utf.size() < 2)
- return wlst.size();
- // try omitting one char of word at a time
-@@ -703,7 +703,7 @@ int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- clock_t timelimit = clock();
- int timer = MINTIMER;
-
-@@ -852,7 +852,7 @@ int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- if (candidate_utf.size() < 2)
- return wlst.size();
-
-@@ -909,10 +909,10 @@ int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- // try swapping not adjacent chars
-- for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
-- for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
-+ for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
-+ for (wide::string::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
- if (std::abs(std::distance(q, p)) > 1) {
- std::swap(*p, *q);
- std::string candidate;
-@@ -962,13 +962,13 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
- const w_char* word,
- int wl,
- int cpdsuggest) {
-- std::vector<w_char> candidate_utf(word, word + wl);
-+ wide::string candidate_utf(word, word + wl);
- if (candidate_utf.size() < 2)
- return wlst.size();
-
- // try moving a char
-- for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
-- for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
-+ for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
-+ for (wide::string::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
- std::swap(*q, *(q - 1));
- if (std::distance(p, q) < 2)
- continue; // omit swap char
-@@ -979,8 +979,8 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
- std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
- }
-
-- for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
-- for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
-+ for (wide::string::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
-+ for (wide::string::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
- std::swap(*q, *(q - 1));
- if (std::distance(p, q) < 2)
- continue; // omit swap char
-@@ -1032,7 +1032,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- word = w2.c_str();
- }
-
-- std::vector<w_char> u8;
-+ wide::string u8;
- int nc = strlen(word);
- int n = (utf8) ? u8_u16(u8, word) : nc;
-
-@@ -1050,7 +1050,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
- std::string target;
- std::string candidate;
-- std::vector<w_char> w_candidate;
-+ wide::string w_candidate;
- if (ph) {
- if (utf8) {
- u8_u16(w_candidate, word);
-@@ -1069,16 +1069,16 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
- FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
-
-- std::vector<w_char> w_word, w_target;
-+ wide::string w_word, w_target;
- if (utf8) {
- u8_u16(w_word, word);
- u8_u16(w_target, target);
- }
-
-- std::vector<w_char> w_entry;
-+ wide::string w_entry;
- std::string f;
-- std::vector<w_char> w_f;
-- std::vector<w_char> w_target2;
-+ wide::string w_f;
-+ wide::string w_target2;
-
- for (size_t i = 0; i < rHMgr.size(); ++i) {
- while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
-@@ -1168,7 +1168,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- // find minimum threshold for a passable suggestion
- // mangle original word three differnt ways
- // and score them to generate a minimum acceptable score
-- std::vector<w_char> w_mw;
-+ wide::string w_mw;
- int thresh = 0;
- for (int sp = 1; sp < 4; sp++) {
- if (utf8) {
-@@ -1210,7 +1210,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- return;
- }
-
-- std::vector<w_char> w_glst_word;
-+ wide::string w_glst_word;
- for (int i = 0; i < MAX_ROOTS; i++) {
- if (roots[i]) {
- struct hentry* rp = roots[i];
-@@ -1288,7 +1288,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- fact = (10.0 - maxd) / 5.0;
- }
-
-- std::vector<w_char> w_gl;
-+ wide::string w_gl;
- for (int i = 0; i < MAX_GUESS; i++) {
- if (guess[i]) {
- // lowering guess[i]
-@@ -1789,8 +1789,8 @@ std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const
-
- // generate an n-gram score comparing s1 and s2, UTF16 version
- int SuggestMgr::ngram(int n,
-- const std::vector<w_char>& su1,
-- const std::vector<w_char>& su2,
-+ const wide::string& su1,
-+ const wide::string& su2,
- int opt) {
- int nscore = 0;
- int ns;
-@@ -1803,8 +1803,8 @@ int SuggestMgr::ngram(int n,
- if (l2 == 0)
- return 0;
- // lowering dictionary word
-- const std::vector<w_char>* p_su2 = &su2;
-- std::vector<w_char> su2_copy;
-+ const wide::string* p_su2 = &su2;
-+ wide::string su2_copy;
- if (opt & NGRAM_LOWERING) {
- su2_copy = su2;
- mkallsmall_utf(su2_copy, langnum);
-@@ -1894,8 +1894,8 @@ int SuggestMgr::ngram(int n,
-
- // length of the left common substring of s1 and (decapitalised) s2, UTF version
- int SuggestMgr::leftcommonsubstring(
-- const std::vector<w_char>& su1,
-- const std::vector<w_char>& su2) {
-+ const wide::string& su1,
-+ const wide::string& su2) {
- int l1 = su1.size();
- int l2 = su2.size();
- // decapitalize dictionary word
-@@ -1948,8 +1948,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
- int diffpos[2];
- *is_swap = 0;
- if (utf8) {
-- std::vector<w_char> su1;
-- std::vector<w_char> su2;
-+ wide::string su1;
-+ wide::string su2;
- int l1 = u8_u16(su1, s1);
- int l2 = u8_u16(su2, s2);
-
-@@ -2004,7 +2004,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
-
- int SuggestMgr::mystrlen(const char* word) {
- if (utf8) {
-- std::vector<w_char> w;
-+ wide::string w;
- return u8_u16(w, word);
- } else
- return strlen(word);
-@@ -2044,8 +2044,8 @@ void SuggestMgr::lcs(const char* s,
- int* l2,
- char** result) {
- int n, m;
-- std::vector<w_char> su;
-- std::vector<w_char> su2;
-+ wide::string su;
-+ wide::string su2;
- char* b;
- char* c;
- int i;
-diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
-index 6ba9dc8..9bfa933 100644
---- a/src/hunspell/suggestmgr.hxx
-+++ b/src/hunspell/suggestmgr.hxx
-@@ -107,11 +107,11 @@ class SuggestMgr {
- private:
- char* ckey;
- size_t ckeyl;
-- std::vector<w_char> ckey_utf;
-+ wide::string ckey_utf;
-
- char* ctry;
- size_t ctryl;
-- std::vector<w_char> ctry_utf;
-+ wide::string ctry_utf;
-
- AffixMgr* pAMgr;
- unsigned int maxSug;
-@@ -173,12 +173,12 @@ class SuggestMgr {
- const std::vector<mapentry>&,
- int*,
- clock_t*);
-- int ngram(int n, const std::vector<w_char>& su1,
-- const std::vector<w_char>& su2, int opt);
-+ int ngram(int n, const wide::string& su1,
-+ const wide::string& su2, int opt);
- int ngram(int n, const std::string& s1, const std::string& s2, int opt);
- int mystrlen(const char* word);
-- int leftcommonsubstring(const std::vector<w_char>& su1,
-- const std::vector<w_char>& su2);
-+ int leftcommonsubstring(const wide::string& su1,
-+ const wide::string& su2);
- int leftcommonsubstring(const char* s1, const char* s2);
- int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
- void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
-diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
-index c561ffc..84ae13c 100644
---- a/src/hunspell/w_char.hxx
-+++ b/src/hunspell/w_char.hxx
-@@ -42,6 +42,7 @@
- #define W_CHAR_HXX_
-
- #include <string>
-+#include <vector>
-
- #ifndef GCC
- struct w_char {
-@@ -72,4 +73,9 @@ struct replentry {
- std::string outstrings[4]; // med, ini, fin, isol
- };
-
-+namespace wide
-+{
-+ typedef std::vector<w_char> string;
-+}
-+
- #endif
-diff --git a/src/parsers/textparser.cxx b/src/parsers/textparser.cxx
-index 53548e4..8e43f79 100644
---- a/src/parsers/textparser.cxx
-+++ b/src/parsers/textparser.cxx
-@@ -81,7 +81,7 @@ int TextParser::is_wordchar(const char* w) {
- if (*w == '\0')
- return 0;
- if (utf8) {
-- std::vector<w_char> wc;
-+ wide::string wc;
- unsigned short idx;
- u8_u16(wc, w);
- if (wc.empty())
-diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx
-index 3172409..c39f148 100644
---- a/src/tools/hunspell.cxx
-+++ b/src/tools/hunspell.cxx
-@@ -199,7 +199,7 @@ enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST, FMT_XML, FMT_ODF };
- std::string wordchars;
- char* dicpath = NULL;
- const w_char* wordchars_utf16 = NULL;
--std::vector<w_char> new_wordchars_utf16;
-+wide::string new_wordchars_utf16;
- int wordchars_utf16_len;
- char* dicname = NULL;
- char* privdicname = NULL;
-@@ -311,7 +311,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
- }
-
- if (io_utf8) {
-- const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
-+ const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
- const std::string& vec_wordchars = pMS->get_wordchars_cpp();
- wordchars_utf16_len = vec_wordchars_utf16.size();
- wordchars_utf16 = wordchars_utf16_len ? &vec_wordchars_utf16[0] : NULL;
-@@ -356,7 +356,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
- ch[1] = '\0';
- size_t res = iconv(conv, (ICONV_CONST char**)&ch8bit, &c1, &dest, &c2);
- if (res != (size_t)-1) {
-- std::vector<w_char> w;
-+ wide::string w;
- u8_u16(w, std::string(u8, dest));
- unsigned short idx = w.empty() ? 0 : (w[0].h << 8) + w[0].l;
- if (unicodeisalpha(idx)) {
-@@ -395,7 +395,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
- }
- #else
- if (strcmp(denc, "UTF-8") == 0) {
-- const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
-+ const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
- wordchars_utf16 = &vec_wordchars_utf16[0];
- wordchars_utf16_len = vec_wordchars_utf16.size();
- io_utf8 = 1;
-@@ -1199,7 +1199,7 @@ void dialogscreen(TextParser* parser,
- std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
- std::string utf8str(token);
- chenc(utf8str, ioenc, "UTF-8");
-- std::vector<w_char> u;
-+ wide::string u;
- u8_u16(u, utf8str);
- if (!u.empty()) {
- unsigned short idx = (u[0].h << 8) + u[0].l;
---
-2.9.3
-
diff --git a/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch b/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch
deleted file mode 100644
index ba48f9f..0000000
--- a/external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-From 7c7f56e1c6fe510a2c5e826cc49aeae3f6614f86 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 13:36:20 +0000
-Subject: [PATCH 3/3] Related: hunspell#406 use a basic_string<w_char> instead
- of vector
-
-kcachegrind reports 1,066,887,723 -> 894,015,631 on
-
-echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
----
- src/hunspell/w_char.hxx | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
-index 84ae13c..e112b5c 100644
---- a/src/hunspell/w_char.hxx
-+++ b/src/hunspell/w_char.hxx
-@@ -42,7 +42,6 @@
- #define W_CHAR_HXX_
-
- #include <string>
--#include <vector>
-
- #ifndef GCC
- struct w_char {
-@@ -75,7 +74,7 @@ struct replentry {
-
- namespace wide
- {
-- typedef std::vector<w_char> string;
-+ typedef std::basic_string<w_char> string;
- }
-
- #endif
---
-2.9.3
-
diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
new file mode 100644
index 0000000..ff2530c
--- /dev/null
+++ b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
@@ -0,0 +1,264 @@
+From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 16:36:27 +0000
+Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest
+
+only lower when we have to and reuse scratch buffers as
+tolower destination
+
+kcachegrind reports 830,529,143 -> 779,887,690 on
+
+echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
+---
+ src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++---------------
+ 1 file changed, 95 insertions(+), 48 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index 54a474f..ea52707 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ u8_u16(w_target, target);
+ }
+
+- std::vector<w_char> w_entry;
+ std::string f;
+ std::vector<w_char> w_f;
+- std::vector<w_char> w_target2;
+
+ for (size_t i = 0; i < rHMgr.size(); ++i) {
+ while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
+@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ continue;
+
+ if (utf8) {
+- w_entry.clear();
+- u8_u16(w_entry, HENTRY_WORD(hp));
+- sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) +
+- leftcommonsubstring(w_word, w_entry);
++ w_f.clear();
++ u8_u16(w_f, HENTRY_WORD(hp));
++
++ int leftcommon = leftcommonsubstring(w_word, w_f);
++ if (low) {
++ // lowering dictionary word
++ mkallsmall_utf(w_f, langnum);
++ }
++ sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+ } else {
+- sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
+- leftcommonsubstring(word, HENTRY_WORD(hp));
++ f.assign(HENTRY_WORD(hp));
++
++ int leftcommon = leftcommonsubstring(word, f.c_str());
++ if (low) {
++ // lowering dictionary word
++ mkallsmall(f, csconv);
++ }
++ sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+ }
+
+ // check special pronounciation
+@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ if (utf8) {
+ w_f.clear();
+ u8_u16(w_f, f);
+- sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
+- leftcommonsubstring(w_word, w_f);
++
++ int leftcommon = leftcommonsubstring(w_word, w_f);
++ if (low) {
++ // lowering dictionary word
++ mkallsmall_utf(w_f, langnum);
++ }
++ sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+ } else {
+- sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
+- leftcommonsubstring(word, f.c_str());
++ int leftcommon = leftcommonsubstring(word, f.c_str());
++ if (low) {
++ // lowering dictionary word
++ mkallsmall(f, csconv);
++ }
++ sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+ }
+ if (sc2 > sc)
+ sc = sc2;
+@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ candidate = HENTRY_WORD(hp);
+ mkallcap(candidate, csconv);
+ }
+- std::string target2 = phonet(candidate, *ph);
+- w_target2.clear();
++ f = phonet(candidate, *ph);
++ w_f.clear();
+ if (utf8) {
+- u8_u16(w_target2, target2);
+- scphon = 2 * ngram(3, w_target, w_target2,
++ u8_u16(w_f, f);
++ scphon = 2 * ngram(3, w_target, w_f,
+ NGRAM_LONGER_WORSE);
+ } else {
+- scphon = 2 * ngram(3, target, target2,
++ scphon = 2 * ngram(3, target, f,
+ NGRAM_LONGER_WORSE);
+ }
+ }
+@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ w_mw[k].l = '*';
+ w_mw[k].h = 0;
+ }
+- thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low);
++
++ if (low) {
++ // lowering dictionary word
++ mkallsmall_utf(w_mw, langnum);
++ }
++
++ thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
+ } else {
+ std::string mw = word;
+ for (int k = sp; k < n; k += 4)
+ mw[k] = '*';
+- thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
++
++ if (low) {
++ // lowering dictionary word
++ mkallsmall(mw, csconv);
++ }
++
++ thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
+ }
+ }
+ thresh = thresh / 3;
+@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ return;
+ }
+
+- std::vector<w_char> w_glst_word;
+ for (int i = 0; i < MAX_ROOTS; i++) {
+ if (roots[i]) {
+ struct hentry* rp = roots[i];
+@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+
+ for (int k = 0; k < nw; k++) {
+ if (utf8) {
+- w_glst_word.clear();
+- u8_u16(w_glst_word, glst[k].word);
+- sc = ngram(n, w_word, w_glst_word,
+- NGRAM_ANY_MISMATCH + low) +
+- leftcommonsubstring(w_word, w_glst_word);
++ w_f.clear();
++ u8_u16(w_f, glst[k].word);
++
++ int leftcommon = leftcommonsubstring(w_word, w_f);
++ if (low) {
++ // lowering dictionary word
++ mkallsmall_utf(w_f, langnum);
++ }
++
++ sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
+ } else {
+- sc = ngram(n, word, glst[k].word,
+- NGRAM_ANY_MISMATCH + low) +
+- leftcommonsubstring(word, glst[k].word);
++ f = glst[k].word;
++
++ int leftcommon = leftcommonsubstring(word, f.c_str());
++ if (low) {
++ // lowering dictionary word
++ mkallsmall(f, csconv);
++ }
++
++ sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
+ }
+
+ if (sc > thresh) {
+@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ w_gl.clear();
+ if (utf8) {
+ u8_u16(w_gl, gl);
+- re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+- ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
++ //w_gl is lowercase already at this point
++ re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++ if (low) {
++ w_f = w_word;
++ // lowering dictionary word
++ mkallsmall_utf(w_f, langnum);
++ re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++ } else {
++ re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++ }
+ } else {
+- re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+- ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
++ //gl is lowercase already at this point
++ re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++ if (low) {
++ f = word;
++ // lowering dictionary word
++ mkallsmall(f, csconv);
++ re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++ } else {
++ re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++ }
+ }
+
+ int ngram_score, leftcommon_score;
+ if (utf8) {
+- ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low);
++ //w_gl is lowercase already at this point
++ ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
+ leftcommon_score = leftcommonsubstring(w_word, w_gl);
+ } else {
+- ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low);
++ //gl is lowercase already at this point
++ ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
+ leftcommon_score = leftcommonsubstring(word, gl.c_str());
+ }
+ gscore[i] =
+@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n,
+ l2 = su2.size();
+ if (l2 == 0)
+ return 0;
+- // lowering dictionary word
+- const std::vector<w_char>* p_su2 = &su2;
+- std::vector<w_char> su2_copy;
+- if (opt & NGRAM_LOWERING) {
+- su2_copy = su2;
+- mkallsmall_utf(su2_copy, langnum);
+- p_su2 = &su2_copy;
+- }
+ for (int j = 1; j <= n; j++) {
+ ns = 0;
+ for (int i = 0; i <= (l1 - j); i++) {
+@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n,
+ for (int l = 0; l <= (l2 - j); l++) {
+ for (k = 0; k < j; k++) {
+ const w_char& c1 = su1[i + k];
+- const w_char& c2 = (*p_su2)[l + k];
++ const w_char& c2 = su2[l + k];
+ if ((c1.l != c2.l) || (c1.h != c2.h))
+ break;
+ }
+@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n,
+ if (l2 == 0)
+ return 0;
+ l1 = s1.size();
+- std::string t(s2);
+- if (opt & NGRAM_LOWERING)
+- mkallsmall(t, csconv);
+ for (int j = 1; j <= n; j++) {
+ ns = 0;
+ for (int i = 0; i <= (l1 - j); i++) {
+- //t is haystack, s1[i..i+j) is needle
+- if (t.find(s1.c_str()+i, 0, j) != std::string::npos) {
++ //s2 is haystack, s1[i..i+j) is needle
++ if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
+ ns++;
+ } else if (opt & NGRAM_WEIGHTED) {
+ ns--;
+--
+2.9.3
+
diff --git a/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
new file mode 100644
index 0000000..6c8a108
--- /dev/null
+++ b/external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch
@@ -0,0 +1,81 @@
+From aab258adbd9c78931a36b96e58975a08000249a8 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
+Date: Fri, 10 Feb 2017 17:14:35 +0000
+Subject: [PATCH 4/4] either clear will be called anyway before use, or its
+ unused afterwards
+
+---
+ src/hunspell/suggestmgr.cxx | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index ea52707..ae34535 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1089,7 +1089,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ continue;
+
+ if (utf8) {
+- w_f.clear();
+ u8_u16(w_f, HENTRY_WORD(hp));
+
+ int leftcommon = leftcommonsubstring(w_word, w_f);
+@@ -1115,7 +1114,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
+ int sc2;
+ if (utf8) {
+- w_f.clear();
+ u8_u16(w_f, f);
+
+ int leftcommon = leftcommonsubstring(w_word, w_f);
+@@ -1139,7 +1137,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ int scphon = -20000;
+ if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
+ if (utf8) {
+- w_candidate.clear();
+ u8_u16(w_candidate, HENTRY_WORD(hp));
+ mkallcap_utf(w_candidate, langnum);
+ u16_u8(candidate, w_candidate);
+@@ -1148,7 +1145,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ mkallcap(candidate, csconv);
+ }
+ f = phonet(candidate, *ph);
+- w_f.clear();
+ if (utf8) {
+ u8_u16(w_f, f);
+ scphon = 2 * ngram(3, w_target, w_f,
+@@ -1254,7 +1250,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+
+ for (int k = 0; k < nw; k++) {
+ if (utf8) {
+- w_f.clear();
+ u8_u16(w_f, glst[k].word);
+
+ int leftcommon = leftcommonsubstring(w_word, w_f);
+@@ -1335,7 +1330,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ std::string gl;
+ int len;
+ if (utf8) {
+- w_gl.clear();
+ len = u8_u16(w_gl, guess[i]);
+ mkallsmall_utf(w_gl, langnum);
+ u16_u8(gl, w_gl);
+@@ -1355,7 +1349,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ }
+ // using 2-gram instead of 3, and other weightening
+
+- w_gl.clear();
+ if (utf8) {
+ u8_u16(w_gl, gl);
+ //w_gl is lowercase already at this point
+@@ -1421,7 +1414,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ // lowering rootphon[i]
+ std::string gl;
+ int len;
+- w_gl.clear();
+ if (utf8) {
+ len = u8_u16(w_gl, rootsphon[i]);
+ mkallsmall_utf(w_gl, langnum);
+--
+2.9.3
+
diff --git a/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch b/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
deleted file mode 100644
index 1d896c6..0000000
--- a/external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From 1393bd64581d6010a65d368e1031641391bdb154 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 14:30:13 +0000
-Subject: [PATCH 1/2] use a per-hashmgr persistent wide string scratch buffer
-
-kcachegrind reports 894,015,631 -> 845,183,693
----
- src/hunspell/hashmgr.cxx | 40 ++++++++++++++++++----------------------
- src/hunspell/hashmgr.hxx | 1 +
- 2 files changed, 19 insertions(+), 22 deletions(-)
-
-diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
-index 6d92e9b..8d6189b 100644
---- a/src/hunspell/hashmgr.cxx
-+++ b/src/hunspell/hashmgr.cxx
-@@ -345,11 +345,10 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- flags2[flagslen] = ONLYUPCASEFLAG;
- if (utf8) {
- std::string st;
-- wide::string w;
-- u8_u16(w, word);
-- mkallsmall_utf(w, langnum);
-- mkinitcap_utf(w, langnum);
-- u16_u8(st, w);
-+ u8_u16(workbuf, word);
-+ mkallsmall_utf(workbuf, langnum);
-+ mkinitcap_utf(workbuf, langnum);
-+ u16_u8(st, workbuf);
- return add_word(st, wcl, flags2, flagslen + 1, dp, true);
- } else {
- std::string new_word(word);
-@@ -366,9 +365,8 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
- int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
- int len;
- if (utf8) {
-- wide::string dest_utf;
-- len = u8_u16(dest_utf, word);
-- *captype = get_captype_utf8(dest_utf, langnum);
-+ len = u8_u16(workbuf, word);
-+ *captype = get_captype_utf8(workbuf, langnum);
- } else {
- len = word.size();
- *captype = get_captype(word, csconv);
-@@ -688,13 +686,12 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
- break;
- }
- case FLAG_UNI: { // UTF-8 characters
-- wide::string w;
-- u8_u16(w, flags);
-- len = w.size();
-+ u8_u16(workbuf, flags);
-+ len = workbuf.size();
- *result = (unsigned short*)malloc(len * sizeof(unsigned short));
- if (!*result)
- return -1;
-- memcpy(*result, &w[0], len * sizeof(short));
-+ memcpy(*result, &workbuf[0], len * sizeof(short));
- break;
- }
- default: { // Ispell's one-character flags (erfg -> e r f g)
-@@ -760,12 +757,11 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
- break;
- }
- case FLAG_UNI: { // UTF-8 characters
-- wide::string w;
-- u8_u16(w, flags);
-- size_t len = w.size();
-+ u8_u16(workbuf, flags);
-+ size_t len = workbuf.size();
- size_t origsize = result.size();
- result.resize(origsize + len);
-- memcpy(&result[origsize], &w[0], len * sizeof(short));
-+ memcpy(&result[origsize], &workbuf[0], len * sizeof(short));
- break;
- }
- default: { // Ispell's one-character flags (erfg -> e r f g)
-@@ -793,10 +789,9 @@ unsigned short HashMgr::decode_flag(const char* f) const {
- s = (unsigned short)i;
- break;
- case FLAG_UNI: {
-- wide::string w;
-- u8_u16(w, f);
-- if (!w.empty())
-- memcpy(&s, &w[0], 1 * sizeof(short));
-+ u8_u16(workbuf, f);
-+ if (!workbuf.empty())
-+ memcpy(&s, &workbuf[0], 1 * sizeof(short));
- break;
- }
- default:
-@@ -820,8 +815,9 @@ char* HashMgr::encode_flag(unsigned short f) const {
- ch = stream.str();
- } else if (flag_mode == FLAG_UNI) {
- const w_char* w_c = (const w_char*)&f;
-- wide::string w(w_c, w_c + 1);
-- u16_u8(ch, w);
-+ workbuf.clear();
-+ workbuf.push_back(*w_c);
-+ u16_u8(ch, workbuf);
- } else {
- ch.push_back((unsigned char)(f));
- }
-diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
-index 312c8ba..78ffb44 100644
---- a/src/hunspell/hashmgr.hxx
-+++ b/src/hunspell/hashmgr.hxx
-@@ -97,6 +97,7 @@ class HashMgr {
- struct cs_info* csconv;
- std::string ignorechars;
- wide::string ignorechars_utf16;
-+ mutable wide::string workbuf;
- int numaliasf; // flag vector `compression' with aliases
- unsigned short** aliasf;
- unsigned short* aliasflen;
---
-2.9.3
-
diff --git a/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch b/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
deleted file mode 100644
index a892f74..0000000
--- a/external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch
+++ /dev/null
@@ -1,168 +0,0 @@
-From 5c7bfa8d36b87a0649f6f88b20624c38a3a5f0ae Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm at redhat.com>
-Date: Mon, 23 Jan 2017 14:43:19 +0000
-Subject: [PATCH 2/2] use a per-hashmgr persistent wide string scratch buffer
-
-kcachegrind reports 845,183,693 -> 812,760,392
----
- src/hunspell/suggestmgr.cxx | 55 ++++++++++++++++++++-------------------------
- src/hunspell/suggestmgr.hxx | 3 +++
- 2 files changed, 27 insertions(+), 31 deletions(-)
-
-diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
-index 1deec96..f5ea01b 100644
---- a/src/hunspell/suggestmgr.cxx
-+++ b/src/hunspell/suggestmgr.cxx
-@@ -491,10 +491,11 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
- if (word[i] == word[i - 2]) {
- state++;
- if (state == 3) {
-- wide::string candidate_utf(word, word + i - 1);
-- candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
-+ workbuf1.clear();
-+ workbuf1.insert(workbuf1.end(), word, word + i - 1);
-+ workbuf1.insert(workbuf1.end(), word + i + 1, word + wl);
- std::string candidate;
-- u16_u8(candidate, candidate_utf);
-+ u16_u8(candidate, workbuf1);
- testsug(wlst, candidate, cpdsuggest, NULL, NULL);
- state = 0;
- }
-@@ -1050,12 +1051,11 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
- std::string target;
- std::string candidate;
-- wide::string w_candidate;
- if (ph) {
- if (utf8) {
-- u8_u16(w_candidate, word);
-- mkallcap_utf(w_candidate, langnum);
-- u16_u8(candidate, w_candidate);
-+ u8_u16(workbuf1, word);
-+ mkallcap_utf(workbuf1, langnum);
-+ u16_u8(candidate, workbuf1);
- } else {
- candidate.assign(word);
- if (!nonbmp)
-@@ -1121,10 +1121,9 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
- int scphon = -20000;
- if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
- if (utf8) {
-- w_candidate.clear();
-- u8_u16(w_candidate, HENTRY_WORD(hp));
-- mkallcap_utf(w_candidate, langnum);
-- u16_u8(candidate, w_candidate);
-+ u8_u16(workbuf1, HENTRY_WORD(hp));
-+ mkallcap_utf(workbuf1, langnum);
-+ u16_u8(candidate, workbuf1);
- } else {
- candidate = HENTRY_WORD(hp);
- mkallcap(candidate, csconv);
-@@ -1804,11 +1803,10 @@ int SuggestMgr::ngram(int n,
- return 0;
- // lowering dictionary word
- const wide::string* p_su2 = &su2;
-- wide::string su2_copy;
- if (opt & NGRAM_LOWERING) {
-- su2_copy = su2;
-- mkallsmall_utf(su2_copy, langnum);
-- p_su2 = &su2_copy;
-+ workbuf1 = su2;
-+ mkallsmall_utf(workbuf1, langnum);
-+ p_su2 = &workbuf1;
- }
- for (int j = 1; j <= n; j++) {
- ns = 0;
-@@ -1948,22 +1946,20 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
- int diffpos[2];
- *is_swap = 0;
- if (utf8) {
-- wide::string su1;
-- wide::string su2;
-- int l1 = u8_u16(su1, s1);
-- int l2 = u8_u16(su2, s2);
-+ int l1 = u8_u16(workbuf1, s1);
-+ int l2 = u8_u16(workbuf2, s2);
-
- if (l1 <= 0 || l2 <= 0)
- return 0;
-
- // decapitalize dictionary word
- if (complexprefixes) {
-- su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
-+ workbuf2[l2 - 1] = lower_utf(workbuf2[l2 - 1], langnum);
- } else {
-- su2[0] = lower_utf(su2[0], langnum);
-+ workbuf2[0] = lower_utf(workbuf2[0], langnum);
- }
- for (int i = 0; (i < l1) && (i < l2); i++) {
-- if (su1[i] == su2[i]) {
-+ if (workbuf1[i] == workbuf2[i]) {
- num++;
- } else {
- if (diff < 2)
-@@ -1972,8 +1968,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
- }
- }
- if ((diff == 2) && (l1 == l2) &&
-- (su1[diffpos[0]] == su2[diffpos[1]]) &&
-- (su1[diffpos[1]] == su2[diffpos[0]]))
-+ (workbuf1[diffpos[0]] == workbuf2[diffpos[1]]) &&
-+ (workbuf1[diffpos[1]] == workbuf2[diffpos[0]]))
- *is_swap = 1;
- } else {
- size_t i;
-@@ -2004,8 +2000,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
-
- int SuggestMgr::mystrlen(const char* word) {
- if (utf8) {
-- wide::string w;
-- return u8_u16(w, word);
-+ return u8_u16(workbuf1, word);
- } else
- return strlen(word);
- }
-@@ -2044,15 +2039,13 @@ void SuggestMgr::lcs(const char* s,
- int* l2,
- char** result) {
- int n, m;
-- wide::string su;
-- wide::string su2;
- char* b;
- char* c;
- int i;
- int j;
- if (utf8) {
-- m = u8_u16(su, s);
-- n = u8_u16(su2, s2);
-+ m = u8_u16(workbuf1, s);
-+ n = u8_u16(workbuf2, s2);
- } else {
- m = strlen(s);
- n = strlen(s2);
-@@ -2073,7 +2066,7 @@ void SuggestMgr::lcs(const char* s,
- c[j] = 0;
- for (i = 1; i <= m; i++) {
- for (j = 1; j <= n; j++) {
-- if (((utf8) && (su[i - 1] == su2[j - 1])) ||
-+ if (((utf8) && (workbuf1[i - 1] == workbuf2[j - 1])) ||
- ((!utf8) && (s[i - 1] == s2[j - 1]))) {
- c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
- b[i * (n + 1) + j] = LCS_UPLEFT;
-diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
-index 9bfa933..80b0fd7 100644
---- a/src/hunspell/suggestmgr.hxx
-+++ b/src/hunspell/suggestmgr.hxx
-@@ -105,6 +105,9 @@ class SuggestMgr {
- SuggestMgr& operator=(const SuggestMgr&);
-
- private:
-+ mutable wide::string workbuf1;
-+ mutable wide::string workbuf2;
-+
- char* ckey;
- size_t ckeyl;
- wide::string ckey_utf;
---
-2.9.3
-
diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk
index e493366..b30bd08 100644
--- a/external/hunspell/UnpackedTarball_hunspell.mk
+++ b/external/hunspell/UnpackedTarball_hunspell.mk
@@ -22,10 +22,12 @@ $(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1))
$(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \
external/hunspell/0001-unroll-this-a-bit.patch \
- external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch \
- external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch \
- external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
- external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
+ external/hunspell/0001-cppcheck-redundant-c_str.patch \
+ external/hunspell/0001-cppcheck-rv-is-reassigned-before-old-value-used.patch \
+ external/hunspell/0001-loop-via-iterators.patch \
+ external/hunspell/0002-add-a-get_clen_and_captype-varient-that-takes-a-buff.patch \
+ external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch \
+ external/hunspell/0004-either-clear-will-be-called-anyway-before-use-or-its.patch \
))
# vim: set noet sw=4 ts=4:
More information about the Libreoffice-commits
mailing list