/* hyphen_show: listing hyphenations                                       */
/*                                                                         */
/* Copyright (C) 1997 G. Lamprecht, W. Lotz, R. Weibezahn; IWD, Uni Bremen */

#include <stdio.h>
#include <string.h>
#ifndef AIX
  #include <sys/fcntl.h>
#else
  #include <fcntl.h>
  #include <unistd.h>
#endif
#define BUFLEN_E 32768
int  filed1;
FILE *a_file,*ver_dat;
int  anzz=0;

extern int  gchar();


int gchar(byt) char   *byt;
{ static char buffer[BUFLEN_E];
  static int cnt=0;
  static char *ptr;

  if (cnt == 0)
  {cnt=read(filed1,buffer,BUFLEN_E);
   ptr=buffer;
  }
  if (cnt > 0)
  {*byt = *ptr; 
   cnt--;  ptr++;  anzz++;
   return (1);
  }
  else if (cnt < 0)
  {printf("\nerror in reading, return code=%d\n",cnt);
   return (cnt);
  }
  else return (cnt);
}

char true  = 't',
     false = 'f';
char e_endfile = 'f';

long d_w, w_grenze = 50000, w_grenze_zeile = 5000000,
     d_x, x_grenze = 50000, x_grenze_zeile =  700000,
     d_y, y_grenze = 10000, y_grenze_zeile =  100000,
     d_r, r_grenze = 50000, r_grenze_zeile = 1000000,
     d_v, v_grenze = 10000, v_grenze_zeile =  600000,
     d_z, z_grenze = 10000, z_grenze_zeile =  700000,
     mitte = 30;
int  laenge_hck = 20000,
     laenge_neu =  5000,
     laenge_puf =   250;
char font_code1[]="OT1",
     font_code2[]="T1";

int  s_p,s_max, hck_p,hck_max,neu_max,s_puffer_p,np;
long pos1,gesamt,anzgef;

char vorschub,enthalten;
char hauptdatei[80],verdatei[80],aus_dat[80],
    *hauptdateip,  *verdateip,  *aus_datp,
    *font_codep, font_code[10];
char seite[5000][250],s_puffer[250],hh[80],hh1[80],
    *seitep[5000],*ttaltp;
char wortanf[250],wortend[250];
char hck[20000][80];
char neu[5000][80];
unsigned char k,zz;

void fehler()
{printf("\nError: eof before postamble\n"); exit(0);
}

unsigned  char e_inbyte()
  {
   if ( gchar(&zz) == 1) {return zz;} else fehler();
  }

/* Beginn der Umsetzroutinen */

void a_outimage()
  { char b;int kk;
    if (s_puffer_p > 2) 
   { if (s_puffer[s_puffer_p-2] == '-') { b = true; } else
     if (s_puffer[s_puffer_p-1] == '-') { b = true; } else {b = false;};
     if ((b == true) || (enthalten == true)) 
    { memset(&seite[s_p][0],(char)0,laenge_puf);
      seitep[s_p] = strncpy(&seite[s_p][0],&s_puffer[0],s_puffer_p);  
      s_p++; 
      enthalten = b;
    }
     s_puffer_p = 0;
   }
  }

void a_outchar(c) char c;
  { s_puffer[s_puffer_p] = c;
    s_puffer_p++; if (s_puffer_p >= laenge_puf) a_outimage();
    s_puffer[s_puffer_p] = (char)0;
  }

void skip(int kk)
  { int j;
    for (j=1; j <= kk; j++)
      {e_inbyte();}
  }

void umlaut()
  { a_outchar('"');}

void set_char_k(unsigned char code)
  { a_outchar((char) code);}

char special_char(unsigned char k)
  {
/* Sonderzeichen fuer "von" "bis" (--) und Gedankenstrich (---)
   sollen in der Form angezeigt werden.
*/
    if ((char) k == '{') {a_outchar('-'); a_outchar('-'); return true;}
    if ((char) k == '|') {a_outchar('-'); a_outchar('-'); a_outchar('-');
                          return true;}
    if (strcmp(font_code, font_code1) == 0)
    { if      (k == 11) {a_outchar('f'); a_outchar('f'); return true;}
      else if (k == 12) {a_outchar('f'); a_outchar('i'); return true;}
      else if (k == 13) {a_outchar('f'); a_outchar('l'); return true;}
      else if (k == 14) {a_outchar('f'); a_outchar('f'); a_outchar('i');
                         return true;}
      else if (k == 15) {a_outchar('f'); a_outchar('f'); a_outchar('l');
                         return true;}
      else if (k == 25) {a_outchar('"'); a_outchar('s'); return true;}
      else return false;
    }
    else if (strcmp(font_code,  font_code2) == 0)
    { if      (k == 27) {a_outchar('f'); a_outchar('f'); return true;}
      else if (k == 28) {a_outchar('f'); a_outchar('i'); return true;}
      else if (k == 29) {a_outchar('f'); a_outchar('l'); return true;}
      else if (k == 30) {a_outchar('f'); a_outchar('f'); a_outchar('i');
                         return true;}
      else if (k == 31) {a_outchar('f'); a_outchar('f'); a_outchar('l');
                         return true;}
      else if (k == 255) {a_outchar((char)223); return true;}
      else return false;
    }
    else return false;
  }

void set_put(int hj)
  { int hn; unsigned  char hc;
    for (hn = 1; hn<= hj+1; hn++)
    { hc = e_inbyte();
      if (special_char(hc)== true){}
      else a_outchar((char) hc);
    }
  }

void bop(unsigned char code)
  { char b;
    long hn,hw; unsigned  char hc,hk;
    a_outimage();
    vorschub = true;
    hw = (int) e_inbyte();
    for (hn=2; hn <= 4; hn++) 
    { hk = e_inbyte();
      hw = hw * 256 + (long) hk;
    }
    skip(40);                               /* skip 9*c[4] und p[1]  */
    sprintf(&s_puffer[0],"%c %d",(char) code, hw);
    s_puffer_p = strlen(&s_puffer[0]);
    b = enthalten; enthalten = true;
    a_outimage();
    enthalten = b;
  }

void test_neue_seite()
  { char * pp;
    pp = strchr(seitep[s_p], (char) 139);
    if (pp != NULL)
    { if (np >= 15) {printf("\n");np = 0;}
      printf("[%s]",pp+2); np = np+1;
    }
  }

char gefunden(char *str)
  { int n;
    n = strlen(str);
    if (str[n-1] != '-') {return false;}
    if (n > 1){if (str[n-2] != '-') return true;} 
    return false;
  }

char *wortanfang(char *str)
  { char ht[2] = {(char)0, (char)0};
    int n,k;
    n = strlen(str); k = n;
    ht[0] = (char)0;
    if (n == 1) 
    { if (str[0] == '-') {goto fin;}
      else {ht[0] = '-';}
    }
    while ((k>0) && (str[k-1] != ' '))
    { if (k==0) {goto fin;}
      k--;
    }
fin:
   memset(&wortanf[0], (char)0,laenge_puf);
   return strcat(strncpy(&wortanf[0],&str[k],n-k),&ht[0]);
  } 

char *wortende(char *str)
  { int n,k,k1;
    n = strlen(str); 
    k1 = 0; k = 0;
    while ((str[k1] == ' ') && (k1 <= n)) k1++;
    if (k1 == n) goto fin;
    k = k1;
    while ((k<n) && (str[k] != ' ')) k++;
fin:
    memset(&wortend[0],(char)0,laenge_puf);
    return strncpy(&wortend[0],&str[k1],k);
  }

void f_set_pos(int k)
  { int k1;
    for (k1=1; k1 <= k; k1++)
    { fprintf(a_file,"%c", ' '); }
  }

void set_pos(int k)
  { int k1;
    for (k1=1; k1 <= k; k1++)
    { printf("%c", ' '); }
  }

char *strip(char *str)
  { int n,k;
    n = strlen(str); k = n;
    while ((k>0) && str[k-1] == ' '){ str[k-1] = (char)0; k--;}
    return str;
  }

char vergl_test(char * tcp)
  { int p1;
    p1 = 0;
    while ( p1 <= hck_max)
    { if (strcmp(tcp, &hck[p1][0]) == 0) {anzgef++; return true;}
      p1++;
    }
    p1 = 0;
    while ( p1 <= neu_max)
    { if (strcmp(tcp, &neu[p1][0]) == 0) {anzgef++; return true;}
      p1++;
    }
    neu_max++;
    if (neu_max >= laenge_neu) {} else
      strcpy(&neu[neu_max][0],tcp);
    return false;
  }

char zugelassen(char sta, unsigned char c)
  { int rc; char b;
    if ((c == '-') || (isalpha(c)>0)) return true;
    rc = (unsigned char) c;
/*  Nationals nach DIN 66 303, Tab 1; `A bis ''y, ohne \times und \div.*/
    if (font_code == font_code1)
       {if ((rc >= 192) && (rc != 215) && (rc != 247)) {return true;};}
     if (font_code == font_code2)
/*  Cork-Tabelle, Sept 1990  */
     { if ((rc >= 128) && (rc != 159)
                       && (rc != 189)
                       && (rc != 190)
                       && (rc != 191)) {return true;}
     }
     if (sta == true) { if (c == '"') {return true;}};
     return false;
  }

char* text_clear(str, p) char * str; int* p;
  { int n; char cc;
    n = 0;
    while (strlen(str) > 0)
    { cc = (char) str[strlen(str)-1];
      if (zugelassen(false,cc) == false) 
      { str[strlen(str)-1] = (char) 0;}
      else goto trail;
   }
trail:
    while (n <strlen(str))
    { cc = (char) str[n];
      if (zugelassen(true,cc) == false) {n++;}
      else goto fin;
    }
fin:
    *p =  n;
    memset(&hh1[0],(char)0,80);
    return strncpy(&hh1[0], &str[n], strlen(&str[n]));
  }

void eop()
  { char gef;
    char tc[80];
    char * ttp, *ttap,*tgesp, *tcp;
    int ttapl,ntc;
    a_outimage();
    s_max = s_p-1;
    s_p = 0; if (s_p > s_max) {goto hinter_schl;}
    test_neue_seite();
    ttaltp = seitep[s_p];
    s_p = 1; if (s_p > s_max) {goto hinter_schl;}
    ttaltp = seitep[s_p];
    test_neue_seite();
    while  (s_p <= s_max)
    { ttp = strip(seitep[s_p]);
      gef = gefunden(ttp);
      if (gef == true)
      { ttap = wortanfang(ttp);
        gesamt++;
        ttapl = strlen(ttap);
        if (mitte-ttapl > 1) {pos1 = mitte-ttapl;} else pos1 = 1;
        ttaltp = ttp;
        s_p++; if (s_p > s_max) goto hinter_schl;
        test_neue_seite();
        ttp = strip(seitep[s_p]);
        tgesp = strcat(ttap,wortende(ttp));
        strcpy(&hh[0],tgesp);
        tcp = text_clear(&hh[0],&ntc);
        if (vergl_test(tcp) == false)
        { if (vorschub == true) {printf("\n"); vorschub = false;}
          f_set_pos(pos1+ntc); fprintf(a_file,"%s\n",tcp);
          set_pos(pos1); printf("%s\n",tgesp);
        }
        goto schl;
      }
      s_p++; if (s_p > s_max) goto hinter_schl;
      test_neue_seite();
schl: ;
    }
hinter_schl:
    if (gef == true)
    { if (vergl_test(ttap) == false)
      { f_set_pos(pos1+ntc); fprintf(a_file,"%s\n",ttap);
        set_pos(pos1); printf("%s\n",ttap);
      }
    }
    s_p = 0;
  }

int pow(int b,int p)
  { int w,k;
    w = b;
    for (k=1; k < p; k++) {w = w *b;};
    return w;
  }

void eingabe(dd, k) long* dd; unsigned char k;
  { char neg; unsigned char hk;
    int hn,hw; 
    hw = (int) e_inbyte();
    neg = false;
    if (hw > 127) {neg = true;};
    for (hn = 2; hn <= (int) k; hn++) 
    { hk = (int) e_inbyte();
      hw = hw*256+hk;
    };
    if (neg == true) {*dd = -(pow(2,k*8))+hw; } else *dd = hw;
  }

void right(unsigned char code)
  { eingabe(&d_r, code-142);
    if (d_r > r_grenze) a_outchar(' ');
    if (d_r > r_grenze_zeile) a_outimage();
  }

void w0()
  { if (d_w > w_grenze) a_outchar(' ');
    if (abs(d_w) > w_grenze_zeile) a_outimage();
  }

void w(unsigned char code)
  { eingabe(&d_w, code-147);
    if (d_w > w_grenze) a_outchar(' ');
    if (d_w > w_grenze_zeile) a_outimage();
  }

void x0()
  { if (d_x > x_grenze) a_outchar(' ');
    if (abs(d_x) > x_grenze_zeile) a_outimage();
  }

void x(unsigned char code)
  { eingabe(&d_x, code-152);
    if (d_x > x_grenze) a_outchar(' ');
    if (d_x > x_grenze_zeile) a_outimage();
  }

void down(unsigned char code)
  { eingabe(&d_v, code-156);
    if (d_v > v_grenze_zeile) a_outimage();
  }

void y0()
  { if (d_y > y_grenze) a_outchar(' ');
    if (abs(d_y) > y_grenze_zeile) a_outimage();
  }

void y(unsigned char code)
  { eingabe(&d_y, code-161);
    if (d_y > y_grenze) a_outchar(' ');
    if (d_y > y_grenze_zeile) a_outimage();
  }

void z0()
  { if (d_z > z_grenze) a_outchar(' ');
    if (abs(d_z) > z_grenze_zeile) a_outimage();
  }

void z(unsigned char code)
  { eingabe(&d_z, code-166);
    if (d_z > z_grenze) a_outchar(' ');
    if (d_z > z_grenze_zeile) a_outimage();
  }

void xxx(unsigned char code)
  { long hn,hw,hk,hk1; unsigned  char hc;
    hk = (int) code-238;
    hw = (int) e_inbyte();
    for (hn=2; hn <= hk; hn++) 
    { hk1 = (int) e_inbyte();
      hw = hw * 256 + (long) hk1;
    }
    skip(hw);             
  }

void fnt_def(unsigned char code)
  { long hw,hk; 
    skip(code - 230);                    /* skip k[1] bzw k[4]  */
    hw = (int) e_inbyte();                  /* Lesen a[1]          */
    hk = (int) e_inbyte();                  /* Lesen l[1]          */
    skip(2*hw+hk);                         /* skip a + 1           */
  }

void pre()
  { long hn; 
    skip(13); 
    hn = (int) e_inbyte();
    skip(hn); 
  }

main(n_arg,v_arg) int n_arg; char *v_arg[];
  { font_codep = &font_code2[0];
    verdateip = NULL;
    if (setvbuf(stdout,NULL,_IOLBF,0) != 0)
      { perror("setvbuf"); exit(1);}
    printf("\nhyphen_show V.C01/5.6.97\n");
    if (n_arg >= 3) 
    { if (strcmp(v_arg[1],"-c") == 0)
      { font_codep = strcpy(&font_code[0],v_arg[2]);
        if (n_arg == 3)
        { printf("missing file name; exit\n");
          goto schluss;
        }
        else
        { hauptdateip = strcpy(&hauptdatei[0],v_arg[3]);
          if (n_arg >= 5)
          { verdateip = strcpy(&verdatei[0],v_arg[4]);}
        }
      }
      else
      { hauptdateip = strcpy(&hauptdatei[0],v_arg[1]);
        verdateip = strcpy(&verdatei[0],v_arg[2]);
      }
    }
    else
    if (n_arg == 1)
    { printf("missing file name; exit\n");
      goto schluss;
    }
    else 
    { hauptdateip = strcpy(&hauptdatei[0],v_arg[1]);
      if (n_arg == 3) {verdateip = strcpy(&verdatei[0],v_arg[2]);};
    };

    if ((strlen(hauptdateip) <= 4) || (strstr(hauptdateip,".dvi") == 0))
    { hauptdateip = strcat(hauptdateip,".dvi");
    };

    aus_datp = strncpy(&aus_dat[0],hauptdateip, strlen(hauptdateip)-4); 
    aus_datp = strcat(aus_datp,".hyp"); 

    filed1=open(hauptdateip,O_RDONLY);    
    if ((int)filed1 <= 0) {printf("\ninput file not found\n");goto schluss;}

    a_file=fopen(aus_datp,"w+");    

    if (verdateip == NULL) {}
    else
    { ver_dat = fopen(verdateip,"r");    
      if (ver_dat == NULL) {printf("\nfile not found: %s\n",verdateip);}
    else
    { hck_p = 0;
      fscanf(ver_dat,"%s", & hck[hck_p][0]);
      while (feof(ver_dat) == 0)
      { hck_p++;
        if (hck_p >= laenge_hck)
        { printf("\nwarning: more than %d lines in file %s\n",
                  laenge_hck,verdateip);
          goto weiter;
        }
        fscanf(ver_dat,"%s", & hck[hck_p][0]);
      }
weiter:
     hck_max = hck_p;

    }
    }

    printf("\n\ncoding: %s\n",font_codep); 
    printf("same words are given only once\n\n"); 

    s_puffer_p = 0; s_p = 0; enthalten = false;
    anzgef = 0; neu_max = 0; np = 0;

    k =  e_inbyte();
    while (e_endfile == false)
    { if      (k == 0) {;}                         /* set_char_0            */
      else if (special_char(k) == true) {;}     /* Ligatur, sz -- u. ---    */
      else if (k <= 126) {set_char_k(k);}       /* Ausgabe druckb. Zeichen  */
      else if (k == 127) {umlaut();}            /* Umlautzeichen wird zu "  */
      else if (k <= 131) {set_put(k-128);}      /* set1 bis set4            */
      else if (k == 132) {skip(8);}             /* set_rule                 */
      else if (k <= 136) {set_put(k-133);}      /* put1 bis put4            */
      else if (k == 137) {skip(8);}             /* put_rule                 */
      else if (k == 138) {;}                    /* nop (no operation)       */
      else if (k == 139) {bop(k);}              /* Seiten-Beginn            */
      else if (k == 140) {eop();}               /* Seiten-Ende              */
      else if (k == 141) {;}                    /* push                     */
      else if (k == 142) {;}                    /* pop                      */
      else if (k <= 146) {right(k);}            /* right1 bis right4        */
      else if (k == 147) {w0();}                /* w0                       */
      else if (k <= 151) {w(k);}                /* w1 bis w4                */
      else if (k == 152) {x0();}                /* x0                       */
      else if (k <= 156) {x(k);}                /* x1 bis x4                */
      else if (k <= 160) {down(k);}             /* down1 bis down4          */
      else if (k == 161) {y0();}                /* y0                       */
      else if (k <= 165) {y(k);}                /* y1 bis y4                */
      else if (k == 166) {z0();}                /* z0                       */
      else if (k <= 170) {z(k);}                /* z1 bis z4                */
      else if (k <= 234) {;}                    /* Fontumschaltungen        */
      else if (k <= 238) {skip(k-234);}         /* fnt1 bis fnt4            */
      else if (k <= 242) {xxx(k);}              /* xxx1 bis xxx4            */
      else if (k <= 246) {fnt_def(k);}          /* Font-Definitionen        */
      else if (k == 247) {pre();}               /* Praeambel            */
      else if (k == 248) {break;}               /* Ueberlesen Post,Post_post*/
      ;   
      k =  e_inbyte();
    };

    close(filed1);
    fclose(a_file);

    printf("\nnumber of hyphenations:%5d\n",gesamt);
    printf("already known:         %5d\n",anzgef);
schluss: exit(0);
  }

