[Libreoffice-commits] .: 2 commits - hyphen/hyphen-2.7.1-2.8.2.patch hyphen/hyphen-2.7.1-2.8.3.patch hyphen/makefile.mk
Caolán McNamara
caolan at kemper.freedesktop.org
Mon Oct 24 07:39:26 PDT 2011
hyphen/hyphen-2.7.1-2.8.2.patch | 325 --------------------------------
hyphen/hyphen-2.7.1-2.8.3.patch | 399 ++++++++++++++++++++++++++++++++++++++++
hyphen/makefile.mk | 2
3 files changed, 400 insertions(+), 326 deletions(-)
New commits:
commit e894342d6bdbe7cf6b3f93c6e66d6988e3fda907
Author: Caolán McNamara <caolanm at redhat.com>
Date: Mon Oct 24 15:38:01 2011 +0100
fix up hyphen 2.8.2/2.8.3 conflicts
diff --git a/hyphen/hyphen-2.7.1-2.8.2.patch b/hyphen/hyphen-2.7.1-2.8.2.patch
deleted file mode 100644
index 912fba7..0000000
--- a/hyphen/hyphen-2.7.1-2.8.2.patch
+++ /dev/null
@@ -1,325 +0,0 @@
---- misc/build/hyphen-2.7.1/hyphen.c.old 2011-10-07 15:51:25.883686906 +0200
-+++ misc/build/hyphen-2.7.1/hyphen.c 2011-10-07 15:51:59.363686900 +0200
-@@ -242,99 +242,45 @@
- }
- #endif
-
--HyphenDict *
--hnj_hyphen_load (const char *fn)
--{
-- HyphenDict *dict[2];
-- HashTab *hashtab;
-- FILE *f;
-- char buf[MAX_CHARS];
-+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
-+ int i, j;
- char word[MAX_CHARS];
- char pattern[MAX_CHARS];
- char * repl;
- signed char replindex;
- signed char replcut;
-- int state_num = 0, last_state;
-- int i, j, k;
-+ int state_num = 0;
-+ int last_state;
- char ch;
- int found;
-- HashEntry *e;
-- int nextlevel = 0;
--
-- f = fopen (fn, "r");
-- if (f == NULL)
-- return NULL;
-
--// loading one or two dictionaries (separated by NEXTLEVEL keyword)
--for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
-- hashtab = hnj_hash_new ();
--#ifdef VERBOSE
-- global = hashtab;
--#endif
-- hnj_hash_insert (hashtab, "", 0);
-- dict[k] = hnj_malloc (sizeof(HyphenDict));
-- dict[k]->num_states = 1;
-- dict[k]->states = hnj_malloc (sizeof(HyphenState));
-- dict[k]->states[0].match = NULL;
-- dict[k]->states[0].repl = NULL;
-- dict[k]->states[0].fallback_state = -1;
-- dict[k]->states[0].num_trans = 0;
-- dict[k]->states[0].trans = NULL;
-- dict[k]->nextlevel = NULL;
-- dict[k]->lhmin = 0;
-- dict[k]->rhmin = 0;
-- dict[k]->clhmin = 0;
-- dict[k]->crhmin = 0;
-- dict[k]->nohyphen = NULL;
-- dict[k]->nohyphenl = 0;
--
-- /* read in character set info */
-- if (k == 0) {
-- for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-- fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
-- for (i=0;i<MAX_NAME;i++)
-- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
-- dict[k]->cset[i] = 0;
-- dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
-- } else {
-- strcpy(dict[k]->cset, dict[0]->cset);
-- dict[k]->utf8 = dict[0]->utf8;
-- }
--
-- while (fgets (buf, sizeof(buf), f) != NULL)
-- {
-- if (buf[0] != '%')
-- {
-- if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
-- nextlevel = 1;
-- break;
-- } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
-- dict[k]->lhmin = atoi(buf + 13);
-- continue;
-+ if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
-+ dict->lhmin = atoi(buf + 13);
-+ return;
- } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
-- dict[k]->rhmin = atoi(buf + 14);
-- continue;
-+ dict->rhmin = atoi(buf + 14);
-+ return;
- } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
-- dict[k]->clhmin = atoi(buf + 21);
-- continue;
-+ dict->clhmin = atoi(buf + 21);
-+ return;
- } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
-- dict[k]->crhmin = atoi(buf + 22);
-- continue;
-+ dict->crhmin = atoi(buf + 22);
-+ return;
- } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
- char * space = buf + 8;
- while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
-- if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
-- if (dict[k]->nohyphen) {
-- char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
-+ if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
-+ if (dict->nohyphen) {
-+ char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
- *nhe = 0;
-- for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
-+ for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
- if (*nhe == ',') {
-- dict[k]->nohyphenl++;
-+ dict->nohyphenl++;
- *nhe = 0;
- }
- }
- }
-- continue;
-+ return;
- }
- j = 0;
- pattern[j] = '0';
-@@ -379,7 +325,7 @@
- } else {
- if (*word == '.') i++;
- /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
-- if (dict[k]->utf8) {
-+ if (dict->utf8) {
- int pu = -1; /* unicode character position */
- int ps = -1; /* unicode start position (original replindex) */
- int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
-@@ -403,14 +349,14 @@
- printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl);
- #endif
- found = hnj_hash_lookup (hashtab, word);
-- state_num = hnj_get_state (dict[k], hashtab, word);
-- dict[k]->states[state_num].match = hnj_strdup (pattern + i);
-- dict[k]->states[state_num].repl = repl;
-- dict[k]->states[state_num].replindex = replindex;
-+ state_num = hnj_get_state (dict, hashtab, word);
-+ dict->states[state_num].match = hnj_strdup (pattern + i);
-+ dict->states[state_num].repl = repl;
-+ dict->states[state_num].replindex = replindex;
- if (!replcut) {
-- dict[k]->states[state_num].replcut = (signed char) strlen(word);
-+ dict->states[state_num].replcut = (signed char) strlen(word);
- } else {
-- dict[k]->states[state_num].replcut = replcut;
-+ dict->states[state_num].replcut = replcut;
- }
-
- /* now, put in the prefix transitions */
-@@ -420,11 +366,81 @@
- ch = word[j - 1];
- word[j - 1] = '\0';
- found = hnj_hash_lookup (hashtab, word);
-- state_num = hnj_get_state (dict[k], hashtab, word);
-- hnj_add_trans (dict[k], state_num, last_state, ch);
-+ state_num = hnj_get_state (dict, hashtab, word);
-+ hnj_add_trans (dict, state_num, last_state, ch);
- }
-- }
-+}
-+
-+HyphenDict *
-+hnj_hyphen_load (const char *fn)
-+{
-+ HyphenDict *dict[2];
-+ HashTab *hashtab;
-+ FILE *f;
-+ char buf[MAX_CHARS];
-+ int nextlevel = 0;
-+ int i, j, k;
-+ HashEntry *e;
-+ int state_num = 0;
-+
-+ f = fopen (fn, "r");
-+ if (f == NULL)
-+ return NULL;
-+
-+// loading one or two dictionaries (separated by NEXTLEVEL keyword)
-+for (k = 0; k < 2; k++) {
-+ hashtab = hnj_hash_new ();
-+#ifdef VERBOSE
-+ global = hashtab;
-+#endif
-+ hnj_hash_insert (hashtab, "", 0);
-+ dict[k] = hnj_malloc (sizeof(HyphenDict));
-+ dict[k]->num_states = 1;
-+ dict[k]->states = hnj_malloc (sizeof(HyphenState));
-+ dict[k]->states[0].match = NULL;
-+ dict[k]->states[0].repl = NULL;
-+ dict[k]->states[0].fallback_state = -1;
-+ dict[k]->states[0].num_trans = 0;
-+ dict[k]->states[0].trans = NULL;
-+ dict[k]->nextlevel = NULL;
-+ dict[k]->lhmin = 0;
-+ dict[k]->rhmin = 0;
-+ dict[k]->clhmin = 0;
-+ dict[k]->crhmin = 0;
-+ dict[k]->nohyphen = NULL;
-+ dict[k]->nohyphenl = 0;
-+
-+ /* read in character set info */
-+ if (k == 0) {
-+ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-+ fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
-+ for (i=0;i<MAX_NAME;i++)
-+ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
-+ dict[k]->cset[i] = 0;
-+ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
-+ } else {
-+ strcpy(dict[k]->cset, dict[0]->cset);
-+ dict[k]->utf8 = dict[0]->utf8;
-+ }
-+
-+ if (k == 0 || nextlevel) {
-+ while (fgets (buf, sizeof(buf), f) != NULL) {
-+ if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
-+ nextlevel = 1;
-+ break;
-+ } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
- }
-+ } else if (k == 1) {
-+ /* default first level: hyphen and ASCII apostrophe */
-+ if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN -,'\n", dict[k], hashtab);
-+ else hnj_hyphen_load_line("NOHYPHEN -,',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
-+ hnj_hyphen_load_line("1-1\n", dict[k], hashtab); /* hyphen */
-+ hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
-+ if (dict[0]->utf8) {
-+ hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
-+ hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
-+ }
-+ }
-
- /* Could do unioning of matches here (instead of the preprocessor script).
- If we did, the pseudocode would look something like this:
-@@ -476,7 +492,15 @@
- state_num = 0;
- }
- fclose(f);
-- if (k == 2) dict[0]->nextlevel = dict[1];
-+ if (nextlevel) dict[0]->nextlevel = dict[1];
-+ else {
-+ dict[1] -> nextlevel = dict[0];
-+ dict[1]->lhmin = dict[0]->lhmin;
-+ dict[1]->rhmin = dict[0]->rhmin;
-+ dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 2);
-+ dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 2);
-+ return dict[1];
-+ }
- return dict[0];
- }
-
-@@ -527,8 +551,13 @@
- j = 0;
- prep_word[j++] = '.';
-
-- for (i = 0; i < word_size; i++)
-+ for (i = 0; i < word_size; i++) {
-+ if (word[i] <= '9' && word[i] >= '0') {
-+ prep_word[j++] = '.';
-+ } else {
- prep_word[j++] = word[i];
-+ }
-+ }
-
- prep_word[j++] = '.';
- prep_word[j] = '\0';
-@@ -670,6 +699,9 @@
- i += hnj_ligature(word[2]);
- }
-
-+ // ignore numbers
-+ for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
-+
- for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
- // check length of the non-standard part
- if (*rep && *pos && *cut && (*rep)[j]) {
-@@ -696,9 +728,13 @@
- int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
- char *** rep, int ** pos, int ** cut, int rhmin)
- {
-- int i;
-- int j = word_size - 2;
-- for (i = 1; i < rhmin && j > 0; j--) {
-+ int i = 1;
-+ int j;
-+
-+ // ignore numbers
-+ for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
-+
-+ for (j = word_size - 2; i < rhmin && j > 0; j--) {
- // check length of the non-standard part
- if (*rep && *pos && *cut && (*rep)[j]) {
- char * rh = strchr((*rep)[j], '=');
-@@ -756,8 +792,15 @@
- j = 0;
- prep_word[j++] = '.';
-
-- for (i = 0; i < word_size; i++)
-+ for (i = 0; i < word_size; i++) {
-+ if (word[i] <= '9' && word[i] >= '0') {
-+ prep_word[j++] = '.';
-+ } else {
- prep_word[j++] = word[i];
-+ }
-+ }
-+
-+
-
- prep_word[j++] = '.';
- prep_word[j] = '\0';
-@@ -1093,8 +1136,10 @@
- char *hyphword, char *** rep, int ** pos, int ** cut,
- int lhmin, int rhmin, int clhmin, int crhmin)
- {
-- lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
-- rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
-+ lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
-+ rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
-+ clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
-+ crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
- hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
- clhmin, crhmin, 1, 1);
- hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
diff --git a/hyphen/makefile.mk b/hyphen/makefile.mk
index 06cfc58..1cb1a10 100644
--- a/hyphen/makefile.mk
+++ b/hyphen/makefile.mk
@@ -44,7 +44,7 @@ ADDITIONAL_FILES += makefile.mk
PATCH_FILES= \
hyphen-2.7.1.patch \
hyphen-2.7.1-read-charset.patch \
- hyphen-2.7.1-2.8.2.patch
+ hyphen-2.7.1-2.8.3.patch
.IF "$(GUI)"=="UNX"
CONFIGURE_DIR=$(BUILD_DIR)
commit 161a463b814d18ec74c3829f9ae35f635725356c
Author: László Németh <nemeth at numbertext.org>
Date: Mon Oct 10 16:22:13 2011 +0200
Add fixes of Hyphen 2.8.3
diff --git a/hyphen/hyphen-2.7.1-2.8.3.patch b/hyphen/hyphen-2.7.1-2.8.3.patch
new file mode 100644
index 0000000..047ce13
--- /dev/null
+++ b/hyphen/hyphen-2.7.1-2.8.3.patch
@@ -0,0 +1,399 @@
+--- misc/build/hyphen-2.7.1/hyphen.c.old 2011-10-10 15:58:33.317260138 +0200
++++ misc/build/hyphen-2.7.1/hyphen.c 2011-10-10 15:58:55.221260136 +0200
+@@ -226,115 +226,61 @@
+ }
+
+ #ifdef VERBOSE
+-HashTab *global;
++HashTab *global[1];
+
+ static char *
+-get_state_str (int state)
++get_state_str (int state, int level)
+ {
+ int i;
+ HashEntry *e;
+
+ for (i = 0; i < HASH_SIZE; i++)
+- for (e = global->entries[i]; e; e = e->next)
++ for (e = global[level]->entries[i]; e; e = e->next)
+ if (e->val == state)
+ return e->key;
+ return NULL;
+ }
+ #endif
+
+-HyphenDict *
+-hnj_hyphen_load (const char *fn)
+-{
+- HyphenDict *dict[2];
+- HashTab *hashtab;
+- FILE *f;
+- char buf[MAX_CHARS];
++void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
++ int i, j;
+ char word[MAX_CHARS];
+ char pattern[MAX_CHARS];
+ char * repl;
+ signed char replindex;
+ signed char replcut;
+- int state_num = 0, last_state;
+- int i, j, k;
++ int state_num = 0;
++ int last_state;
+ char ch;
+ int found;
+- HashEntry *e;
+- int nextlevel = 0;
+-
+- f = fopen (fn, "r");
+- if (f == NULL)
+- return NULL;
+-
+-// loading one or two dictionaries (separated by NEXTLEVEL keyword)
+-for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
+- hashtab = hnj_hash_new ();
+-#ifdef VERBOSE
+- global = hashtab;
+-#endif
+- hnj_hash_insert (hashtab, "", 0);
+- dict[k] = hnj_malloc (sizeof(HyphenDict));
+- dict[k]->num_states = 1;
+- dict[k]->states = hnj_malloc (sizeof(HyphenState));
+- dict[k]->states[0].match = NULL;
+- dict[k]->states[0].repl = NULL;
+- dict[k]->states[0].fallback_state = -1;
+- dict[k]->states[0].num_trans = 0;
+- dict[k]->states[0].trans = NULL;
+- dict[k]->nextlevel = NULL;
+- dict[k]->lhmin = 0;
+- dict[k]->rhmin = 0;
+- dict[k]->clhmin = 0;
+- dict[k]->crhmin = 0;
+- dict[k]->nohyphen = NULL;
+- dict[k]->nohyphenl = 0;
+-
+- /* read in character set info */
+- if (k == 0) {
+- for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+- fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
+- for (i=0;i<MAX_NAME;i++)
+- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+- dict[k]->cset[i] = 0;
+- dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
+- } else {
+- strcpy(dict[k]->cset, dict[0]->cset);
+- dict[k]->utf8 = dict[0]->utf8;
+- }
+
+- while (fgets (buf, sizeof(buf), f) != NULL)
+- {
+- if (buf[0] != '%')
+- {
+- if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
+- nextlevel = 1;
+- break;
+- } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
+- dict[k]->lhmin = atoi(buf + 13);
+- continue;
++ if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
++ dict->lhmin = atoi(buf + 13);
++ return;
+ } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
+- dict[k]->rhmin = atoi(buf + 14);
+- continue;
++ dict->rhmin = atoi(buf + 14);
++ return;
+ } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
+- dict[k]->clhmin = atoi(buf + 21);
+- continue;
++ dict->clhmin = atoi(buf + 21);
++ return;
+ } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
+- dict[k]->crhmin = atoi(buf + 22);
+- continue;
++ dict->crhmin = atoi(buf + 22);
++ return;
+ } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
+ char * space = buf + 8;
+ while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
+- if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
+- if (dict[k]->nohyphen) {
+- char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
++ if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
++ if (dict->nohyphen) {
++ char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
+ *nhe = 0;
+- for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
++ for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
+ if (*nhe == ',') {
+- dict[k]->nohyphenl++;
++ dict->nohyphenl++;
+ *nhe = 0;
+ }
+ }
+ }
+- continue;
++ return;
+ }
+ j = 0;
+ pattern[j] = '0';
+@@ -379,7 +325,7 @@
+ } else {
+ if (*word == '.') i++;
+ /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
+- if (dict[k]->utf8) {
++ if (dict->utf8) {
+ int pu = -1; /* unicode character position */
+ int ps = -1; /* unicode start position (original replindex) */
+ int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
+@@ -403,14 +349,14 @@
+ printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl);
+ #endif
+ found = hnj_hash_lookup (hashtab, word);
+- state_num = hnj_get_state (dict[k], hashtab, word);
+- dict[k]->states[state_num].match = hnj_strdup (pattern + i);
+- dict[k]->states[state_num].repl = repl;
+- dict[k]->states[state_num].replindex = replindex;
++ state_num = hnj_get_state (dict, hashtab, word);
++ dict->states[state_num].match = hnj_strdup (pattern + i);
++ dict->states[state_num].repl = repl;
++ dict->states[state_num].replindex = replindex;
+ if (!replcut) {
+- dict[k]->states[state_num].replcut = (signed char) strlen(word);
++ dict->states[state_num].replcut = (signed char) strlen(word);
+ } else {
+- dict[k]->states[state_num].replcut = replcut;
++ dict->states[state_num].replcut = replcut;
+ }
+
+ /* now, put in the prefix transitions */
+@@ -420,11 +366,82 @@
+ ch = word[j - 1];
+ word[j - 1] = '\0';
+ found = hnj_hash_lookup (hashtab, word);
+- state_num = hnj_get_state (dict[k], hashtab, word);
+- hnj_add_trans (dict[k], state_num, last_state, ch);
++ state_num = hnj_get_state (dict, hashtab, word);
++ hnj_add_trans (dict, state_num, last_state, ch);
+ }
+- }
++}
++
++HyphenDict *
++hnj_hyphen_load (const char *fn)
++{
++ HyphenDict *dict[2];
++ HashTab *hashtab;
++ FILE *f;
++ char buf[MAX_CHARS];
++ int nextlevel = 0;
++ int i, j, k;
++ HashEntry *e;
++ int state_num = 0;
++
++ f = fopen (fn, "r");
++ if (f == NULL)
++ return NULL;
++
++// loading one or two dictionaries (separated by NEXTLEVEL keyword)
++for (k = 0; k < 2; k++) {
++ hashtab = hnj_hash_new ();
++#ifdef VERBOSE
++ global[k] = hashtab;
++#endif
++ hnj_hash_insert (hashtab, "", 0);
++ dict[k] = hnj_malloc (sizeof(HyphenDict));
++ dict[k]->num_states = 1;
++ dict[k]->states = hnj_malloc (sizeof(HyphenState));
++ dict[k]->states[0].match = NULL;
++ dict[k]->states[0].repl = NULL;
++ dict[k]->states[0].fallback_state = -1;
++ dict[k]->states[0].num_trans = 0;
++ dict[k]->states[0].trans = NULL;
++ dict[k]->nextlevel = NULL;
++ dict[k]->lhmin = 0;
++ dict[k]->rhmin = 0;
++ dict[k]->clhmin = 0;
++ dict[k]->crhmin = 0;
++ dict[k]->nohyphen = NULL;
++ dict[k]->nohyphenl = 0;
++
++ /* read in character set info */
++ if (k == 0) {
++ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
++ fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
++ for (i=0;i<MAX_NAME;i++)
++ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
++ dict[k]->cset[i] = 0;
++ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
++ } else {
++ strcpy(dict[k]->cset, dict[0]->cset);
++ dict[k]->utf8 = dict[0]->utf8;
++ }
++
++ if (k == 0 || nextlevel) {
++ while (fgets (buf, sizeof(buf), f) != NULL) {
++ if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
++ nextlevel = 1;
++ break;
++ } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
++ }
++ } else if (k == 1) {
++ /* default first level: hyphen and ASCII apostrophe */
++ if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
++ else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
++ strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here
++ hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
++ hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
++ if (dict[0]->utf8) {
++ hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
++ hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
+ }
++ }
+
+ /* Could do unioning of matches here (instead of the preprocessor script).
+ If we did, the pseudocode would look something like this:
+@@ -476,7 +493,20 @@
+ state_num = 0;
+ }
+ fclose(f);
+- if (k == 2) dict[0]->nextlevel = dict[1];
++ if (nextlevel) dict[0]->nextlevel = dict[1];
++ else {
++ dict[1] -> nextlevel = dict[0];
++ dict[1]->lhmin = dict[0]->lhmin;
++ dict[1]->rhmin = dict[0]->rhmin;
++ dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
++ dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
++#ifdef VERBOSE
++ HashTab *r = global[0];
++ global[0] = global[1];
++ global[1] = r;
++#endif
++ return dict[1];
++ }
+ return dict[0];
+ }
+
+@@ -527,8 +557,13 @@
+ j = 0;
+ prep_word[j++] = '.';
+
+- for (i = 0; i < word_size; i++)
++ for (i = 0; i < word_size; i++) {
++ if (word[i] <= '9' && word[i] >= '0') {
++ prep_word[j++] = '.';
++ } else {
+ prep_word[j++] = word[i];
++ }
++ }
+
+ prep_word[j++] = '.';
+ prep_word[j] = '\0';
+@@ -557,7 +592,7 @@
+
+ #ifdef VERBOSE
+ char *state_str;
+- state_str = get_state_str (state);
++ state_str = get_state_str (state, 0);
+
+ for (k = 0; k < i - strlen (state_str); k++)
+ putchar (' ');
+@@ -670,6 +705,9 @@
+ i += hnj_ligature(word[2]);
+ }
+
++ // ignore numbers
++ for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
++
+ for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
+ // check length of the non-standard part
+ if (*rep && *pos && *cut && (*rep)[j]) {
+@@ -696,9 +734,13 @@
+ int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
+ char *** rep, int ** pos, int ** cut, int rhmin)
+ {
+- int i;
+- int j = word_size - 2;
+- for (i = 1; i < rhmin && j > 0; j--) {
++ int i = 1;
++ int j;
++
++ // ignore numbers
++ for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
++
++ for (j = word_size - 2; i < rhmin && j > 0; j--) {
+ // check length of the non-standard part
+ if (*rep && *pos && *cut && (*rep)[j]) {
+ char * rh = strchr((*rep)[j], '=');
+@@ -756,8 +798,15 @@
+ j = 0;
+ prep_word[j++] = '.';
+
+- for (i = 0; i < word_size; i++)
++ for (i = 0; i < word_size; i++) {
++ if (word[i] <= '9' && word[i] >= '0') {
++ prep_word[j++] = '.';
++ } else {
+ prep_word[j++] = word[i];
++ }
++ }
++
++
+
+ prep_word[j++] = '.';
+ prep_word[j] = '\0';
+@@ -786,7 +835,7 @@
+
+ #ifdef VERBOSE
+ char *state_str;
+- state_str = get_state_str (state);
++ state_str = get_state_str (state, 1);
+
+ for (k = 0; k < i - strlen (state_str); k++)
+ putchar (' ');
+@@ -1033,6 +1082,9 @@
+ }
+ }
+ hyphens[j + 1] = '\0';
++#ifdef VERBOSE
++ printf ("nums: %s\n", hyphens);
++#endif
+ return 0;
+ }
+
+@@ -1074,8 +1126,8 @@
+ for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
+ char * nhy = (char *) strstr(word, nh);
+ while (nhy) {
+- hyphens[nhy - word + strlen(nh) - 1] = 0;
+- if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0;
++ hyphens[nhy - word + strlen(nh) - 1] = '0';
++ if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0';
+ nhy = (char *) strstr(nhy + 1, nh);
+ }
+ nh = nh + strlen(nh) + 1;
+@@ -1084,6 +1136,9 @@
+
+ if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
+ if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
++#ifdef VERBOSE
++ printf ("nums: %s\n", hyphens);
++#endif
+ return 0;
+ }
+
+@@ -1093,8 +1148,10 @@
+ char *hyphword, char *** rep, int ** pos, int ** cut,
+ int lhmin, int rhmin, int clhmin, int crhmin)
+ {
+- lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
+- rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
++ lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
++ rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
++ clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
++ crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
+ hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
+ clhmin, crhmin, 1, 1);
+ hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
More information about the Libreoffice-commits
mailing list