[Libreoffice-commits] .: 2 commits - hyphen/hyphen-2.7.1-2.8.2.patch hyphen/hyphen-2.7.1-2.8.3.patch hyphen/makefile.mk

Caolán McNamara caolan at kemper.freedesktop.org
Mon Oct 24 07:39:26 PDT 2011


 hyphen/hyphen-2.7.1-2.8.2.patch |  325 --------------------------------
 hyphen/hyphen-2.7.1-2.8.3.patch |  399 ++++++++++++++++++++++++++++++++++++++++
 hyphen/makefile.mk              |    2 
 3 files changed, 400 insertions(+), 326 deletions(-)

New commits:
commit e894342d6bdbe7cf6b3f93c6e66d6988e3fda907
Author: Caolán McNamara <caolanm at redhat.com>
Date:   Mon Oct 24 15:38:01 2011 +0100

    fix up hyphen 2.8.2/2.8.3 conflicts

diff --git a/hyphen/hyphen-2.7.1-2.8.2.patch b/hyphen/hyphen-2.7.1-2.8.2.patch
deleted file mode 100644
index 912fba7..0000000
--- a/hyphen/hyphen-2.7.1-2.8.2.patch
+++ /dev/null
@@ -1,325 +0,0 @@
---- misc/build/hyphen-2.7.1/hyphen.c.old	2011-10-07 15:51:25.883686906 +0200
-+++ misc/build/hyphen-2.7.1/hyphen.c	2011-10-07 15:51:59.363686900 +0200
-@@ -242,99 +242,45 @@
- }
- #endif
- 
--HyphenDict *
--hnj_hyphen_load (const char *fn)
--{
--  HyphenDict *dict[2];
--  HashTab *hashtab;
--  FILE *f;
--  char buf[MAX_CHARS];
-+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
-+  int i, j;
-   char word[MAX_CHARS];
-   char pattern[MAX_CHARS];
-   char * repl;
-   signed char replindex;
-   signed char replcut;
--  int state_num = 0, last_state;
--  int i, j, k;
-+  int state_num = 0;
-+  int last_state;
-   char ch;
-   int found;
--  HashEntry *e;
--  int nextlevel = 0;
--
--  f = fopen (fn, "r");
--  if (f == NULL)
--    return NULL;
- 
--// loading one or two dictionaries (separated by NEXTLEVEL keyword)
--for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
--  hashtab = hnj_hash_new ();
--#ifdef VERBOSE
--  global = hashtab;
--#endif
--  hnj_hash_insert (hashtab, "", 0);
--  dict[k] = hnj_malloc (sizeof(HyphenDict));
--  dict[k]->num_states = 1;
--  dict[k]->states = hnj_malloc (sizeof(HyphenState));
--  dict[k]->states[0].match = NULL;
--  dict[k]->states[0].repl = NULL;
--  dict[k]->states[0].fallback_state = -1;
--  dict[k]->states[0].num_trans = 0;
--  dict[k]->states[0].trans = NULL;
--  dict[k]->nextlevel = NULL;
--  dict[k]->lhmin = 0;
--  dict[k]->rhmin = 0;
--  dict[k]->clhmin = 0;
--  dict[k]->crhmin = 0;
--  dict[k]->nohyphen = NULL;
--  dict[k]->nohyphenl = 0;
--
--  /* read in character set info */
--  if (k == 0) {
--    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
--    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
--    for (i=0;i<MAX_NAME;i++)
--      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
--        dict[k]->cset[i] = 0;
--    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
--  } else {
--    strcpy(dict[k]->cset, dict[0]->cset);
--    dict[k]->utf8 = dict[0]->utf8;
--  }
--
--  while (fgets (buf, sizeof(buf), f) != NULL)
--    {
--      if (buf[0] != '%')
--	{
--	  if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
--	    nextlevel = 1;
--	    break;
--	  } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
--	    dict[k]->lhmin = atoi(buf + 13);
--	    continue;
-+	  if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
-+	    dict->lhmin = atoi(buf + 13);
-+	    return;
- 	  } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
--	    dict[k]->rhmin = atoi(buf + 14);
--	    continue;
-+	    dict->rhmin = atoi(buf + 14);
-+	    return;
- 	  } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
--	    dict[k]->clhmin = atoi(buf + 21);
--	    continue;
-+	    dict->clhmin = atoi(buf + 21);
-+	    return;
- 	  } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
--	    dict[k]->crhmin = atoi(buf + 22);
--	    continue;
-+	    dict->crhmin = atoi(buf + 22);
-+	    return;
- 	  } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
- 	    char * space = buf + 8;
- 	    while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
--	    if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
--	    if (dict[k]->nohyphen) {
--	        char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
-+	    if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
-+	    if (dict->nohyphen) {
-+	        char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
- 	        *nhe = 0;
--	        for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
-+	        for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
- 	                if (*nhe == ',') {
--	                    dict[k]->nohyphenl++;
-+	                    dict->nohyphenl++;
- 	                    *nhe = 0;
- 	                }
- 	        }
- 	    }
--	    continue;
-+	    return;
- 	  } 
- 	  j = 0;
- 	  pattern[j] = '0';
-@@ -379,7 +325,7 @@
-           } else {
-             if (*word == '.') i++;
-             /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
--            if (dict[k]->utf8) {
-+            if (dict->utf8) {
-                 int pu = -1;        /* unicode character position */
-                 int ps = -1;        /* unicode start position (original replindex) */
-                 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
-@@ -403,14 +349,14 @@
- 	  printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
- #endif
- 	  found = hnj_hash_lookup (hashtab, word);
--	  state_num = hnj_get_state (dict[k], hashtab, word);
--	  dict[k]->states[state_num].match = hnj_strdup (pattern + i);
--	  dict[k]->states[state_num].repl = repl;
--	  dict[k]->states[state_num].replindex = replindex;
-+	  state_num = hnj_get_state (dict, hashtab, word);
-+	  dict->states[state_num].match = hnj_strdup (pattern + i);
-+	  dict->states[state_num].repl = repl;
-+	  dict->states[state_num].replindex = replindex;
-           if (!replcut) {
--            dict[k]->states[state_num].replcut = (signed char) strlen(word);
-+            dict->states[state_num].replcut = (signed char) strlen(word);
-           } else {
--            dict[k]->states[state_num].replcut = replcut;
-+            dict->states[state_num].replcut = replcut;
-           }
- 
- 	  /* now, put in the prefix transitions */
-@@ -420,11 +366,81 @@
- 	      ch = word[j - 1];
- 	      word[j - 1] = '\0';
- 	      found = hnj_hash_lookup (hashtab, word);
--	      state_num = hnj_get_state (dict[k], hashtab, word);
--	      hnj_add_trans (dict[k], state_num, last_state, ch);
-+	      state_num = hnj_get_state (dict, hashtab, word);
-+	      hnj_add_trans (dict, state_num, last_state, ch);
- 	    }
--	}
-+}
-+
-+HyphenDict *
-+hnj_hyphen_load (const char *fn)
-+{
-+  HyphenDict *dict[2];
-+  HashTab *hashtab;
-+  FILE *f;
-+  char buf[MAX_CHARS];
-+  int nextlevel = 0;
-+  int i, j, k;
-+  HashEntry *e;
-+  int state_num = 0;
-+
-+  f = fopen (fn, "r");
-+  if (f == NULL)
-+    return NULL;
-+
-+// loading one or two dictionaries (separated by NEXTLEVEL keyword)
-+for (k = 0; k < 2; k++) { 
-+  hashtab = hnj_hash_new ();
-+#ifdef VERBOSE
-+  global = hashtab;
-+#endif
-+  hnj_hash_insert (hashtab, "", 0);
-+  dict[k] = hnj_malloc (sizeof(HyphenDict));
-+  dict[k]->num_states = 1;
-+  dict[k]->states = hnj_malloc (sizeof(HyphenState));
-+  dict[k]->states[0].match = NULL;
-+  dict[k]->states[0].repl = NULL;
-+  dict[k]->states[0].fallback_state = -1;
-+  dict[k]->states[0].num_trans = 0;
-+  dict[k]->states[0].trans = NULL;
-+  dict[k]->nextlevel = NULL;
-+  dict[k]->lhmin = 0;
-+  dict[k]->rhmin = 0;
-+  dict[k]->clhmin = 0;
-+  dict[k]->crhmin = 0;
-+  dict[k]->nohyphen = NULL;
-+  dict[k]->nohyphenl = 0;
-+
-+  /* read in character set info */
-+  if (k == 0) {
-+    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-+    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
-+    for (i=0;i<MAX_NAME;i++)
-+      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
-+        dict[k]->cset[i] = 0;
-+    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
-+  } else {
-+    strcpy(dict[k]->cset, dict[0]->cset);
-+    dict[k]->utf8 = dict[0]->utf8;
-+  }
-+
-+  if (k == 0 || nextlevel) {
-+    while (fgets (buf, sizeof(buf), f) != NULL) {
-+      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
-+	nextlevel = 1;
-+	break;
-+      } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
-     }
-+  } else if (k == 1) {
-+    /* default first level: hyphen and ASCII apostrophe */
-+    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN -,'\n", dict[k], hashtab);
-+    else hnj_hyphen_load_line("NOHYPHEN -,',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
-+    hnj_hyphen_load_line("1-1\n", dict[k], hashtab); /* hyphen */
-+    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
-+    if (dict[0]->utf8) {
-+      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
-+      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
-+    }
-+  }
- 
-   /* Could do unioning of matches here (instead of the preprocessor script).
-      If we did, the pseudocode would look something like this:
-@@ -476,7 +492,15 @@
-   state_num = 0;
- }
-   fclose(f);
--  if (k == 2) dict[0]->nextlevel = dict[1];
-+  if (nextlevel) dict[0]->nextlevel = dict[1];
-+  else {
-+    dict[1] -> nextlevel = dict[0];
-+    dict[1]->lhmin = dict[0]->lhmin;
-+    dict[1]->rhmin = dict[0]->rhmin;
-+    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 2);
-+    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 2);
-+    return dict[1];
-+  }
-   return dict[0];
- }
- 
-@@ -527,8 +551,13 @@
-   j = 0;
-   prep_word[j++] = '.';
- 
--  for (i = 0; i < word_size; i++)
-+  for (i = 0; i < word_size; i++) {
-+    if (word[i] <= '9' && word[i] >= '0') {
-+      prep_word[j++] = '.';
-+    } else {
-       prep_word[j++] = word[i];
-+    }
-+  }
- 
-   prep_word[j++] = '.';
-   prep_word[j] = '\0';
-@@ -670,6 +699,9 @@
-       i += hnj_ligature(word[2]);
-     }
- 
-+    // ignore numbers
-+    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
-+
-     for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
-       // check length of the non-standard part
-       if (*rep && *pos && *cut && (*rep)[j]) {
-@@ -696,9 +728,13 @@
- int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
- 	char *** rep, int ** pos, int ** cut, int rhmin)
- {
--    int i;
--    int j = word_size - 2;    
--    for (i = 1; i < rhmin && j > 0; j--) {
-+    int i = 1;
-+    int j;
-+
-+    // ignore numbers
-+    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
-+
-+    for (j = word_size - 2; i < rhmin && j > 0; j--) {
-       // check length of the non-standard part
-       if (*rep && *pos && *cut && (*rep)[j]) {
-         char * rh = strchr((*rep)[j], '=');
-@@ -756,8 +792,15 @@
-   j = 0;
-   prep_word[j++] = '.';
-   
--  for (i = 0; i < word_size; i++)
-+  for (i = 0; i < word_size; i++) {
-+    if (word[i] <= '9' && word[i] >= '0') {
-+      prep_word[j++] = '.';
-+    } else {
-       prep_word[j++] = word[i];
-+    }
-+  }
-+
-+
- 
-   prep_word[j++] = '.';
-   prep_word[j] = '\0';
-@@ -1093,8 +1136,10 @@
- 	char *hyphword, char *** rep, int ** pos, int ** cut,
- 	int lhmin, int rhmin, int clhmin, int crhmin)
- {
--  lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
--  rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
-+  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
-+  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
-+  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
-+  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
-   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
-     clhmin, crhmin, 1, 1);
-   hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
diff --git a/hyphen/makefile.mk b/hyphen/makefile.mk
index 06cfc58..1cb1a10 100644
--- a/hyphen/makefile.mk
+++ b/hyphen/makefile.mk
@@ -44,7 +44,7 @@ ADDITIONAL_FILES += makefile.mk
 PATCH_FILES= \
     hyphen-2.7.1.patch \
     hyphen-2.7.1-read-charset.patch \
-    hyphen-2.7.1-2.8.2.patch
+    hyphen-2.7.1-2.8.3.patch
 
 .IF "$(GUI)"=="UNX"
 CONFIGURE_DIR=$(BUILD_DIR)
commit 161a463b814d18ec74c3829f9ae35f635725356c
Author: László Németh <nemeth at numbertext.org>
Date:   Mon Oct 10 16:22:13 2011 +0200

    Add fixes of Hyphen 2.8.3

diff --git a/hyphen/hyphen-2.7.1-2.8.3.patch b/hyphen/hyphen-2.7.1-2.8.3.patch
new file mode 100644
index 0000000..047ce13
--- /dev/null
+++ b/hyphen/hyphen-2.7.1-2.8.3.patch
@@ -0,0 +1,399 @@
+--- misc/build/hyphen-2.7.1/hyphen.c.old	2011-10-10 15:58:33.317260138 +0200
++++ misc/build/hyphen-2.7.1/hyphen.c	2011-10-10 15:58:55.221260136 +0200
+@@ -226,115 +226,61 @@
+ }
+ 
+ #ifdef VERBOSE
+-HashTab *global;
++HashTab *global[1];
+ 
+ static char *
+-get_state_str (int state)
++get_state_str (int state, int level)
+ {
+   int i;
+   HashEntry *e;
+ 
+   for (i = 0; i < HASH_SIZE; i++)
+-    for (e = global->entries[i]; e; e = e->next)
++    for (e = global[level]->entries[i]; e; e = e->next)
+       if (e->val == state)
+ 	return e->key;
+   return NULL;
+ }
+ #endif
+ 
+-HyphenDict *
+-hnj_hyphen_load (const char *fn)
+-{
+-  HyphenDict *dict[2];
+-  HashTab *hashtab;
+-  FILE *f;
+-  char buf[MAX_CHARS];
++void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
++  int i, j;
+   char word[MAX_CHARS];
+   char pattern[MAX_CHARS];
+   char * repl;
+   signed char replindex;
+   signed char replcut;
+-  int state_num = 0, last_state;
+-  int i, j, k;
++  int state_num = 0;
++  int last_state;
+   char ch;
+   int found;
+-  HashEntry *e;
+-  int nextlevel = 0;
+-
+-  f = fopen (fn, "r");
+-  if (f == NULL)
+-    return NULL;
+-
+-// loading one or two dictionaries (separated by NEXTLEVEL keyword)
+-for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
+-  hashtab = hnj_hash_new ();
+-#ifdef VERBOSE
+-  global = hashtab;
+-#endif
+-  hnj_hash_insert (hashtab, "", 0);
+-  dict[k] = hnj_malloc (sizeof(HyphenDict));
+-  dict[k]->num_states = 1;
+-  dict[k]->states = hnj_malloc (sizeof(HyphenState));
+-  dict[k]->states[0].match = NULL;
+-  dict[k]->states[0].repl = NULL;
+-  dict[k]->states[0].fallback_state = -1;
+-  dict[k]->states[0].num_trans = 0;
+-  dict[k]->states[0].trans = NULL;
+-  dict[k]->nextlevel = NULL;
+-  dict[k]->lhmin = 0;
+-  dict[k]->rhmin = 0;
+-  dict[k]->clhmin = 0;
+-  dict[k]->crhmin = 0;
+-  dict[k]->nohyphen = NULL;
+-  dict[k]->nohyphenl = 0;
+-
+-  /* read in character set info */
+-  if (k == 0) {
+-    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+-    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
+-    for (i=0;i<MAX_NAME;i++)
+-      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+-        dict[k]->cset[i] = 0;
+-    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
+-  } else {
+-    strcpy(dict[k]->cset, dict[0]->cset);
+-    dict[k]->utf8 = dict[0]->utf8;
+-  }
+ 
+-  while (fgets (buf, sizeof(buf), f) != NULL)
+-    {
+-      if (buf[0] != '%')
+-	{
+-	  if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
+-	    nextlevel = 1;
+-	    break;
+-	  } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
+-	    dict[k]->lhmin = atoi(buf + 13);
+-	    continue;
++	  if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
++	    dict->lhmin = atoi(buf + 13);
++	    return;
+ 	  } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
+-	    dict[k]->rhmin = atoi(buf + 14);
+-	    continue;
++	    dict->rhmin = atoi(buf + 14);
++	    return;
+ 	  } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
+-	    dict[k]->clhmin = atoi(buf + 21);
+-	    continue;
++	    dict->clhmin = atoi(buf + 21);
++	    return;
+ 	  } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
+-	    dict[k]->crhmin = atoi(buf + 22);
+-	    continue;
++	    dict->crhmin = atoi(buf + 22);
++	    return;
+ 	  } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
+ 	    char * space = buf + 8;
+ 	    while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
+-	    if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
+-	    if (dict[k]->nohyphen) {
+-	        char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
++	    if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
++	    if (dict->nohyphen) {
++	        char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
+ 	        *nhe = 0;
+-	        for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
++	        for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
+ 	                if (*nhe == ',') {
+-	                    dict[k]->nohyphenl++;
++	                    dict->nohyphenl++;
+ 	                    *nhe = 0;
+ 	                }
+ 	        }
+ 	    }
+-	    continue;
++	    return;
+ 	  } 
+ 	  j = 0;
+ 	  pattern[j] = '0';
+@@ -379,7 +325,7 @@
+           } else {
+             if (*word == '.') i++;
+             /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
+-            if (dict[k]->utf8) {
++            if (dict->utf8) {
+                 int pu = -1;        /* unicode character position */
+                 int ps = -1;        /* unicode start position (original replindex) */
+                 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
+@@ -403,14 +349,14 @@
+ 	  printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
+ #endif
+ 	  found = hnj_hash_lookup (hashtab, word);
+-	  state_num = hnj_get_state (dict[k], hashtab, word);
+-	  dict[k]->states[state_num].match = hnj_strdup (pattern + i);
+-	  dict[k]->states[state_num].repl = repl;
+-	  dict[k]->states[state_num].replindex = replindex;
++	  state_num = hnj_get_state (dict, hashtab, word);
++	  dict->states[state_num].match = hnj_strdup (pattern + i);
++	  dict->states[state_num].repl = repl;
++	  dict->states[state_num].replindex = replindex;
+           if (!replcut) {
+-            dict[k]->states[state_num].replcut = (signed char) strlen(word);
++            dict->states[state_num].replcut = (signed char) strlen(word);
+           } else {
+-            dict[k]->states[state_num].replcut = replcut;
++            dict->states[state_num].replcut = replcut;
+           }
+ 
+ 	  /* now, put in the prefix transitions */
+@@ -420,11 +366,82 @@
+ 	      ch = word[j - 1];
+ 	      word[j - 1] = '\0';
+ 	      found = hnj_hash_lookup (hashtab, word);
+-	      state_num = hnj_get_state (dict[k], hashtab, word);
+-	      hnj_add_trans (dict[k], state_num, last_state, ch);
++	      state_num = hnj_get_state (dict, hashtab, word);
++	      hnj_add_trans (dict, state_num, last_state, ch);
+ 	    }
+-	}
++}
++
++HyphenDict *
++hnj_hyphen_load (const char *fn)
++{
++  HyphenDict *dict[2];
++  HashTab *hashtab;
++  FILE *f;
++  char buf[MAX_CHARS];
++  int nextlevel = 0;
++  int i, j, k;
++  HashEntry *e;
++  int state_num = 0;
++
++  f = fopen (fn, "r");
++  if (f == NULL)
++    return NULL;
++
++// loading one or two dictionaries (separated by NEXTLEVEL keyword)
++for (k = 0; k < 2; k++) { 
++  hashtab = hnj_hash_new ();
++#ifdef VERBOSE
++  global[k] = hashtab;
++#endif
++  hnj_hash_insert (hashtab, "", 0);
++  dict[k] = hnj_malloc (sizeof(HyphenDict));
++  dict[k]->num_states = 1;
++  dict[k]->states = hnj_malloc (sizeof(HyphenState));
++  dict[k]->states[0].match = NULL;
++  dict[k]->states[0].repl = NULL;
++  dict[k]->states[0].fallback_state = -1;
++  dict[k]->states[0].num_trans = 0;
++  dict[k]->states[0].trans = NULL;
++  dict[k]->nextlevel = NULL;
++  dict[k]->lhmin = 0;
++  dict[k]->rhmin = 0;
++  dict[k]->clhmin = 0;
++  dict[k]->crhmin = 0;
++  dict[k]->nohyphen = NULL;
++  dict[k]->nohyphenl = 0;
++
++  /* read in character set info */
++  if (k == 0) {
++    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
++    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
++    for (i=0;i<MAX_NAME;i++)
++      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
++        dict[k]->cset[i] = 0;
++    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
++  } else {
++    strcpy(dict[k]->cset, dict[0]->cset);
++    dict[k]->utf8 = dict[0]->utf8;
++  }
++
++  if (k == 0 || nextlevel) {
++    while (fgets (buf, sizeof(buf), f) != NULL) {
++      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
++	nextlevel = 1;
++	break;
++      } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
++    }
++  } else if (k == 1) {
++    /* default first level: hyphen and ASCII apostrophe */
++    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
++    else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
++    strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here
++    hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
++    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
++    if (dict[0]->utf8) {
++      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
++      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
+     }
++  }
+ 
+   /* Could do unioning of matches here (instead of the preprocessor script).
+      If we did, the pseudocode would look something like this:
+@@ -476,7 +493,20 @@
+   state_num = 0;
+ }
+   fclose(f);
+-  if (k == 2) dict[0]->nextlevel = dict[1];
++  if (nextlevel) dict[0]->nextlevel = dict[1];
++  else {
++    dict[1] -> nextlevel = dict[0];
++    dict[1]->lhmin = dict[0]->lhmin;
++    dict[1]->rhmin = dict[0]->rhmin;
++    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
++    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
++#ifdef VERBOSE
++    HashTab *r = global[0];
++    global[0] = global[1];
++    global[1] = r;
++#endif
++    return dict[1];
++  }
+   return dict[0];
+ }
+ 
+@@ -527,8 +557,13 @@
+   j = 0;
+   prep_word[j++] = '.';
+ 
+-  for (i = 0; i < word_size; i++)
++  for (i = 0; i < word_size; i++) {
++    if (word[i] <= '9' && word[i] >= '0') {
++      prep_word[j++] = '.';
++    } else {
+       prep_word[j++] = word[i];
++    }
++  }
+ 
+   prep_word[j++] = '.';
+   prep_word[j] = '\0';
+@@ -557,7 +592,7 @@
+ 
+ #ifdef VERBOSE
+ 	  char *state_str;
+-	  state_str = get_state_str (state);
++	  state_str = get_state_str (state, 0);
+ 
+ 	  for (k = 0; k < i - strlen (state_str); k++)
+ 	    putchar (' ');
+@@ -670,6 +705,9 @@
+       i += hnj_ligature(word[2]);
+     }
+ 
++    // ignore numbers
++    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
++
+     for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
+       // check length of the non-standard part
+       if (*rep && *pos && *cut && (*rep)[j]) {
+@@ -696,9 +734,13 @@
+ int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
+ 	char *** rep, int ** pos, int ** cut, int rhmin)
+ {
+-    int i;
+-    int j = word_size - 2;    
+-    for (i = 1; i < rhmin && j > 0; j--) {
++    int i = 1;
++    int j;
++
++    // ignore numbers
++    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
++
++    for (j = word_size - 2; i < rhmin && j > 0; j--) {
+       // check length of the non-standard part
+       if (*rep && *pos && *cut && (*rep)[j]) {
+         char * rh = strchr((*rep)[j], '=');
+@@ -756,8 +798,15 @@
+   j = 0;
+   prep_word[j++] = '.';
+   
+-  for (i = 0; i < word_size; i++)
++  for (i = 0; i < word_size; i++) {
++    if (word[i] <= '9' && word[i] >= '0') {
++      prep_word[j++] = '.';
++    } else {
+       prep_word[j++] = word[i];
++    }
++  }
++
++
+ 
+   prep_word[j++] = '.';
+   prep_word[j] = '\0';
+@@ -786,7 +835,7 @@
+ 
+ #ifdef VERBOSE
+ 	  char *state_str;
+-	  state_str = get_state_str (state);
++	  state_str = get_state_str (state, 1);
+ 
+ 	  for (k = 0; k < i - strlen (state_str); k++)
+ 	    putchar (' ');
+@@ -1033,6 +1082,9 @@
+     }
+   }
+   hyphens[j + 1] = '\0';
++#ifdef VERBOSE
++  printf ("nums: %s\n", hyphens);
++#endif
+   return 0;
+ }
+ 
+@@ -1074,8 +1126,8 @@
+     for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
+         char * nhy = (char *) strstr(word, nh);
+         while (nhy) {
+-            hyphens[nhy - word + strlen(nh) - 1] = 0;
+-            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = 0;
++            hyphens[nhy - word + strlen(nh) - 1] = '0';
++            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = '0';
+             nhy = (char *) strstr(nhy + 1, nh);
+         }
+         nh = nh + strlen(nh) + 1;
+@@ -1084,6 +1136,9 @@
+ 
+   if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
+   if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
++#ifdef VERBOSE
++  printf ("nums: %s\n", hyphens);
++#endif
+   return 0;
+ }
+ 
+@@ -1093,8 +1148,10 @@
+ 	char *hyphword, char *** rep, int ** pos, int ** cut,
+ 	int lhmin, int rhmin, int clhmin, int crhmin)
+ {
+-  lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
+-  rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
++  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
++  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
++  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
++  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
+   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
+     clhmin, crhmin, 1, 1);
+   hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,


More information about the Libreoffice-commits mailing list