1 files changed, 515 insertions, 0 deletions
diff --git a/troff/libhnj/hyphen.c b/troff/libhnj/hyphen.c
new file mode 100644
index 0000000000000..3d74b3becf8ce
--- /dev/null
+++ b/troff/libhnj/hyphen.c
@@ -0,0 +1,515 @@
+/* LibHnj is dual licensed under LGPL and MPL. Boilerplate for both
+ * licenses follows.
+ */
+
+/* LibHnj - a library for high quality hyphenation and justification
+ * Copyright (C) 1998 Raph Levien, 
+ * 	     (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 
+ *           (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the 
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
+ * Boston, MA  02111-1307  USA.
+*/
+
+/*
+ * The contents of this file are subject to the Mozilla Public License
+ * Version 1.0 (the "MPL"); you may not use this file except in
+ * compliance with the MPL.  You may obtain a copy of the MPL at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the MPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
+ * for the specific language governing rights and limitations under the
+ * MPL.
+ *
+ */
+
+/*
+ * Changes by Gunnar Ritter, Freiburg i. Br., Germany, August 2005.
+ *
+ * Sccsid @(#)hyphen.c	1.5 (gritter) 4/19/06
+ */
+
+#include <stdlib.h> /* for NULL, malloc */
+#include <stdio.h>  /* for fprintf */
+#include <string.h> /* for strdup */
+
+#ifdef UNX
+#include <unistd.h> /* for exit */
+#endif
+
+#define noVERBOSE
+
+#include "hyphen.h"
+#include "hnjalloc.h"
+
+static char *
+hnj_strdup (const char *s, HyphenDict *hp)
+{
+  char *new;
+  int l;
+
+  l = strlen (s);
+  new = hnj_malloc (l + 1, hp);
+  memcpy (new, s, l);
+  new[l] = 0;
+  return new;
+}
+
+/* a little bit of a hash table implementation. This simply maps strings
+   to state numbers */
+
+typedef struct _HashTab HashTab;
+typedef struct _HashEntry HashEntry;
+
+/* A cheap, but effective, hack. */
+#define HASH_SIZE 31627
+
+struct _HashTab {
+  HashEntry *entries[HASH_SIZE];
+};
+
+struct _HashEntry {
+  HashEntry *next;
+  char *key;
+  int val;
+};
+
+/* a char* hash function from ASU - adapted from Gtk+ */
+static unsigned int
+hnj_string_hash (const char *s)
+{
+  const char *p;
+  unsigned int h=0, g;
+
+  for(p = s; *p != '\0'; p += 1) {
+    h = ( h << 4 ) + *p;
+    if ( ( g = h & 0xf0000000 ) ) {
+      h = h ^ (g >> 24);
+      h = h ^ g;
+    }
+  }
+  return h /* % M */;
+}
+
+static HashTab *
+hnj_hash_new (HyphenDict *hp)
+{
+  HashTab *hashtab;
+  int i;
+
+  hashtab = hnj_malloc (sizeof(HashTab), hp);
+  for (i = 0; i < HASH_SIZE; i++)
+    hashtab->entries[i] = NULL;
+
+  return hashtab;
+}
+
+static void
+hnj_hash_free (HashTab *hashtab, HyphenDict *hp)
+{
+  int i;
+  HashEntry *e, *next;
+
+  for (i = 0; i < HASH_SIZE; i++)
+    for (e = hashtab->entries[i]; e; e = next)
+      {
+	next = e->next;
+	hnj_free (e->key, hp);
+	hnj_free (e, hp);
+      }
+
+  hnj_free (hashtab, hp);
+}
+
+/* assumes that key is not already present! */
+static void
+hnj_hash_insert (HashTab *hashtab, const char *key, int val, HyphenDict *hp)
+{
+  int i;
+  HashEntry *e;
+
+  i = hnj_string_hash (key) % HASH_SIZE;
+  e = hnj_malloc (sizeof(HashEntry), hp);
+  e->next = hashtab->entries[i];
+  e->key = hnj_strdup (key, hp);
+  e->val = val;
+  hashtab->entries[i] = e;
+}
+
+/* return val if found, otherwise -1 */
+static int
+hnj_hash_lookup (HashTab *hashtab, const char *key)
+{
+  int i;
+  HashEntry *e;
+
+  i = hnj_string_hash (key) % HASH_SIZE;
+  for (e = hashtab->entries[i]; e; e = e->next)
+    if (!strcmp (key, e->key))
+      return e->val;
+  return -1;
+}
+
+/* Get the state number, allocating a new state if necessary. */
+static int
+hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string)
+{
+  int state_num;
+
+  state_num = hnj_hash_lookup (hashtab, string);
+
+  if (state_num >= 0)
+    return state_num;
+
+  hnj_hash_insert (hashtab, string, dict->num_states, dict);
+  /* predicate is true if dict->num_states is a power of two */
+  if (dict->num_states >= dict->alc_states)
+    {
+      dict->alc_states *= 2;
+      dict->states = hnj_realloc (dict->states,
+				  dict->alc_states *
+				  sizeof(HyphenState), dict);
+    }
+  dict->states[dict->num_states].match = NULL;
+  dict->states[dict->num_states].fallback_state = -1;
+  dict->states[dict->num_states].num_trans = 0;
+  dict->states[dict->num_states].trans = NULL;
+  return dict->num_states++;
+}
+
+/* add a transition from state1 to state2 through ch - assumes that the
+   transition does not already exist */
+static void
+hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
+{
+  int num_trans;
+
+  num_trans = dict->states[state1].num_trans;
+  if (num_trans == 0)
+    {
+      dict->states[state1].alc_trans = 64;
+      dict->states[state1].trans = hnj_realloc (NULL,
+	      dict->states[state1].alc_trans * sizeof(HyphenTrans), dict);
+    }
+  else if (num_trans >= dict->states[state1].alc_trans)
+    {
+      dict->states[state1].alc_trans *= 2;
+      dict->states[state1].trans = hnj_realloc (dict->states[state1].trans,
+		      dict->states[state1].alc_trans *
+				sizeof(HyphenTrans), dict);
+    }
+  dict->states[state1].trans[num_trans].ch = ch;
+  dict->states[state1].trans[num_trans].new_state = state2;
+  dict->states[state1].num_trans++;
+}
+
+#ifdef VERBOSE
+HashTab *global;
+
+static char *
+get_state_str (int state)
+{
+  int i;
+  HashEntry *e;
+
+  for (i = 0; i < HASH_SIZE; i++)
+    for (e = global->entries[i]; e; e = e->next)
+      if (e->val == state)
+	return e->key;
+  return NULL;
+}
+#endif
+
+HyphenDict *
+hnj_hyphen_load (const char *fn)
+{
+  HyphenDict *dict;
+  HashTab *hashtab;
+  FILE *f;
+  char buf[80];
+  char word[80];
+  char pattern[80];
+  int state_num, last_state;
+  int i, j;
+  char ch;
+  int found;
+  HashEntry *e;
+
+  f = fopen (fn, "r");
+  if (f == NULL)
+    return NULL;
+
+  hashtab = hnj_hash_new (NULL);
+#ifdef VERBOSE
+  global = hashtab;
+#endif
+  hnj_hash_insert (hashtab, "", 0, NULL);
+
+  dict = hnj_malloc (sizeof(HyphenDict), NULL);
+  memset(dict, 0, sizeof *dict);
+  dict->num_states = 1;
+  dict->alc_states = 12288;
+  dict->states = hnj_realloc (NULL, dict->alc_states*sizeof(HyphenState), dict);
+  dict->states[0].match = NULL;
+  dict->states[0].fallback_state = -1;
+  dict->states[0].num_trans = 0;
+  dict->states[0].trans = NULL;
+
+  /* read in character set info */
+  for (i=0;i<MAX_NAME;i++) dict->cset[i]= 0;
+  fgets(dict->cset,  sizeof(dict->cset),f);
+  for (i=0;i<MAX_NAME;i++)
+    if ((dict->cset[i] == '\r') || (dict->cset[i] == '\n'))
+        dict->cset[i] = 0;
+
+  while (fgets (buf, sizeof(buf), f) != NULL)
+    {
+      if (buf[0] != '%')
+	{
+	  j = 0;
+	  pattern[j] = '0';
+	  for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++)
+	    {
+	      if (buf[i] >= '0' && buf[i] <= '9')
+		pattern[j] = buf[i];
+	      else
+		{
+		  word[j] = buf[i];
+		  pattern[++j] = '0';
+		}
+	    }
+	  word[j] = '\0';
+	  pattern[j + 1] = '\0';
+
+	  /* Optimize away leading zeroes */
+	  for (i = 0; pattern[i] == '0'; i++);
+
+#ifdef VERBOSE
+	  printf ("word %s pattern %s, j = %d\n", word, pattern + i, j);
+#endif
+	  found = hnj_hash_lookup (hashtab, word);
+	  state_num = hnj_get_state (dict, hashtab, word);
+	  dict->states[state_num].match = hnj_strdup (pattern + i, dict);
+
+	  /* now, put in the prefix transitions */
+	  for (; found < 0 ;j--)
+	    {
+	      last_state = state_num;
+	      ch = word[j - 1];
+	      word[j - 1] = '\0';
+	      found = hnj_hash_lookup (hashtab, word);
+	      state_num = hnj_get_state (dict, hashtab, word);
+	      hnj_add_trans (dict, state_num, last_state, ch);
+	    }
+	}
+    }
+
+  /* Could do unioning of matches here (instead of the preprocessor script).
+     If we did, the pseudocode would look something like this:
+
+     foreach state in the hash table
+        foreach i = [1..length(state) - 1]
+           state to check is substr (state, i)
+           look it up
+           if found, and if there is a match, union the match in.
+
+     It's also possible to avoid the quadratic blowup by doing the
+     search in order of increasing state string sizes - then you
+     can break the loop after finding the first match.
+
+     This step should be optional in any case - if there is a
+     preprocessed rule table, it's always faster to use that.
+
+*/
+
+  /* put in the fallback states */
+  for (i = 0; i < HASH_SIZE; i++)
+    for (e = hashtab->entries[i]; e; e = e->next)
+      {
+	state_num = -1;
+	for (j = 1; e->key[j-1]; j++)
+	  {
+	    state_num = hnj_hash_lookup (hashtab, e->key + j);
+	    if (state_num >= 0)
+	      break;
+	  }
+        /* KBH: FIXME state 0 fallback_state should always be -1? */
+	if (e->val && state_num >= 0) 
+	  dict->states[e->val].fallback_state = state_num;
+      }
+#ifdef VERBOSE
+  for (i = 0; i < HASH_SIZE; i++)
+    for (e = hashtab->entries[i]; e; e = e->next)
+      {
+	printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val,
+		dict->states[e->val].fallback_state);
+	for (j = 0; j < dict->states[e->val].num_trans; j++)
+	  printf (" %c->%d\n", dict->states[e->val].trans[j].ch,
+		  dict->states[e->val].trans[j].new_state);
+      }
+#endif
+
+#ifndef VERBOSE
+  hnj_hash_free (hashtab, dict);
+#endif
+
+  return dict;
+}
+
+void hnj_hyphen_free (HyphenDict *dict)
+{
+  int state_num;
+  HyphenState *hstate;
+
+  for (state_num = 0; state_num < dict->num_states; state_num++)
+    {
+      hstate = &dict->states[state_num];
+      if (hstate->match)
+	hnj_free (hstate->match, dict);
+      if (hstate->trans)
+	hnj_free (hstate->trans, dict);
+    }
+
+  hnj_free (dict->states, dict);
+
+  hnj_free (dict, NULL);
+}
+
+#define MAX_WORD 256
+
+int hnj_hyphen_hyphenate (HyphenDict *dict,
+			   const char *word, int word_size,
+			   char *hyphens)
+{
+  char prep_word_buf[MAX_WORD];
+  char *prep_word;
+  int i, j, k;
+  int state;
+  char ch;
+  HyphenState *hstate;
+  char *match;
+  int offset;
+
+  if (word_size + 3 < MAX_WORD)
+    prep_word = prep_word_buf;
+  else
+    prep_word = hnj_malloc (word_size + 3, dict);
+
+  j = 0;
+  prep_word[j++] = '.';
+  
+  for (i = 0; i < word_size; i++)
+      prep_word[j++] = word[i];
+      
+  for (i = 0; i < j; i++)                                                       
+    hyphens[i] = '0';    
+  
+  prep_word[j++] = '.';
+
+  prep_word[j] = '\0';
+#ifdef VERBOSE
+  printf ("prep_word = %s\n", prep_word);
+#endif
+
+  /* now, run the finite state machine */
+  state = 0;
+  for (i = 0; i < j; i++)
+    {
+      ch = prep_word[i];
+      for (;;)
+	{
+
+	  if (state == -1) {
+            /* return 1;
+	     * KBH: FIXME shouldn't this be as follows?
+	     */
+            state = 0;
+            goto try_next_letter;
+          }          
+
+#ifdef VERBOSE
+	  char *state_str;
+	  state_str = get_state_str (state);
+
+	  for (k = 0; k < i - strlen (state_str); k++)
+	    putchar (' ');
+	  printf ("%s", state_str);
+#endif
+
+	  hstate = &dict->states[state];
+	  for (k = 0; k < hstate->num_trans; k++)
+	    if (hstate->trans[k].ch == ch)
+	      {
+		state = hstate->trans[k].new_state;
+		goto found_state;
+	      }
+	  state = hstate->fallback_state;
+#ifdef VERBOSE
+	  printf (" falling back, fallback_state %d\n", state);
+#endif
+	}
+    found_state:
+#ifdef VERBOSE
+      printf ("found state %d\n",state);
+#endif
+      /* Additional optimization is possible here - especially,
+	 elimination of trailing zeroes from the match. Leading zeroes
+	 have already been optimized. */
+      match = dict->states[state].match;
+      if (match)
+	{
+	  offset = i + 1 - strlen (match);
+#ifdef VERBOSE
+	  for (k = 0; k < offset; k++)
+	    putchar (' ');
+	  printf ("%s\n", match);
+#endif
+	  /* This is a linear search because I tried a binary search and
+	     found it to be just a teeny bit slower. */
+	  for (k = 0; match[k]; k++)
+	    if (offset + k < word_size && hyphens[offset + k] < match[k])
+	      hyphens[offset + k] = match[k];
+	}
+
+      /* KBH: we need this to make sure we keep looking in a word
+       * for patterns even if the current character is not known in state 0
+       * since patterns for hyphenation may occur anywhere in the word
+       */
+      try_next_letter: ;
+
+    }
+#ifdef VERBOSE
+  for (i = 0; i < j; i++)
+    putchar (hyphens[i]);
+  putchar ('\n');
+#endif
+
+  for (i = 0; i < j - 4; i++)
+#if 0
+    if (hyphens[i + 1] & 1)
+      hyphens[i] = '-';
+#else
+    hyphens[i] = hyphens[i + 1];
+#endif
+  hyphens[0] = '0';
+  for (; i < word_size; i++)
+    hyphens[i] = '0';
+  hyphens[word_size] = '\0';
+
+  if (prep_word != prep_word_buf)
+    hnj_free (prep_word, dict);
+  return 0;    
+}