/*
This is a Optical-Character-Recognition program
Copyright (C) 2000  Joerg Schulenburg

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

 Joerg.Schulenburg@physik.uni-magdeburg.de

  sometimes I have written comments in german language, sorry for that

 This file was retrieved from pgm2asc.cc of Joerg, in order to have
 a library of the ocr-engine from Klaas Freitag <freitag@suse.de>
 
 */
#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <sys/time.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#include "pnm.h"
#include "pgm2asc.h"
#include "pcx.h"
#include "ocr0.h"		/* only_numbers */

/* the following line is used to name the package (make dist) */
#define version_string "0.3.4"

static void out_version(int v) {
  fprintf(stderr, " Optical Character Recognition --- gocr " version_string "\n");
  if (v)
    fprintf(stderr, " use option -h for help\n");
  if (v & 2)
    exit(1);
  return;
}

static void help(void) {
  out_version(0);
  fprintf(stderr,
	  " using: gocr [options] pnm_file_name  # use - for stdin\n"
	  " options:\n"
	  " -h        - get this help\n"
	  " -i name   - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
	  " -i -      - read PNM from stdin (djpeg -gray a.jpg | gocr -)\n"
	  " -o name   - output file  (redirection of stdout)\n"
	  " -e name   - logging file (redirection of stderr)\n"
	  " -p name   - database path (including final slash, default is ./db/)\n"
	  " -f fmt    - output format (ISO8859_1 TeX HTML)\n"
	  " -l num    - grey level 0<160<=255\n"
	  " -d num    - dust_size (remove all smaller clusters, 10=default)\n"
	  " -s num    - spacewidth/dots (0 = autodetect)\n"
	  " -v num    - verbose  [summed]\n"
	  "      1      print more info\n"
	  "      2      list shapes  of boxes (see -c)\n"
	  "      4      list pattern of boxes (see -c)\n"
	  "      8      print pattern after recognition\n"
	  "     16      print line infos\n"
	  "     32      debug outXX.pgm\n"
	  " -c string - list of chars (_ = not recognized chars)\n"
	  " -m num    - operation modes, ~ = switch off\n"
	  "      2      use database (early development)\n"
	  "      4      layout analysis, zoning (development)\n"
	  "      8      ~ compare non recognized chars\n"
	  "     16      ~ divide overlapping chars\n"
	  "     32      ~ context correction\n"
	  "     64      char packing (development)\n"
	  "    130      extend database, prompts user (128+2, early development)\n"
	  " -n   1      only numbers\n"
	  " examples:\n"
	  "\tgocr -v 33 text1.pbm                # some infos + out30.bmp\n"
	  "\tgocr -v 7 -c _YV text1.pbm          # list unknown, Y and V chars\n"
	  "\tdjpeg -pnm -gray text.jpg | gocr -  # use jpeg-file via pipe\n"
	  "\n");
  exit(0);
}

/* from the glibc documentation */
static int timeval_subtract (struct timeval *result, struct timeval *x, 
    struct timeval *y) {

  /* Perform the carry for the later subtraction by updating Y. */
  if (x->tv_usec < y->tv_usec) {
    int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
    y->tv_usec -= 1000000 * nsec;
    y->tv_sec += nsec;
  }
  if (x->tv_usec - y->tv_usec > 1000000) {
    int nsec = (x->tv_usec - y->tv_usec) / 1000000;
    y->tv_usec += 1000000 * nsec;
    y->tv_sec -= nsec;
  }

  /* Compute the time remaining to wait.
     `tv_usec' is certainly positive. */
  result->tv_sec = x->tv_sec - y->tv_sec;
  result->tv_usec = x->tv_usec - y->tv_usec;

  /* Return 1 if result is negative. */
  return x->tv_sec < y->tv_sec;
}

/* -------------------------------------------------------------
// ------   MAIN - replace this by your own aplication! 
// ------------------------------------------------------------- */
int main(int argn, char *argv[]) {
  int i, cs = 0, spc = 0, mo = 0, dust_size = 10, numo = 0;
  int verbose = 0;
  FORMAT out_format = ISO8859_1;
  int linecounter = 0;
#ifdef USE_UNICODE
  const wchar_t *line, *c;
#else
  const char *line;
#endif
  pix p;
  char *inam = "-", *lc = "_", *s1;
  struct timeval init;

  env.db_path=(char *)NULL;   /* initial value */
  setvbuf(stdout, (char *) NULL, _IONBF, 0);	/* not buffered */
  if (argn <= 1) {
    out_version(1);
    exit(0);
  }
#ifdef HAVE_PGM_H
  pnm_init(&argn, &argv);
#endif
  /* process arguments */
  for (i = 1; i < argn; i++) {
    if (strcmp(argv[i], "--help") == 0)
      help(); /* and quits */
    if (argv[i][0] == '-' && argv[i][1] != 0) {
      s1 = "";
      if (i + 1 < argn)
	s1 = argv[i + 1];
      switch (argv[i][1]) {
      case 'h': /* help */
	help();
	break;
      case 'i': /* input image file */
	inam = s1;
	i++;
	break;
      case 'e': /* logging file */
	if (s1[0] == '-' && s1[1] == '\0') {
#ifdef HAVE_UNISTD_H
          dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout  works */
#else
	  fprintf(stderr, "stderr redirection not possible without unistd.h\n");
#endif           
	}
	else if (!freopen(s1, "w", stderr)) {
	  fprintf(stderr, "stderr redirection to %s failed\n", s1);
	}
	i++;
	break;
      case 'p': /* database path */
	env.db_path=s1;
	i++;
	break;
      case 'o': /* output file */
	if (s1[0] == '-' && s1[1] == '\0') {	/* default */
	}
	else if (!freopen(s1, "w", stdout)) {
	  fprintf(stderr, "stdout redirection to %s failed\n", s1);
	};
	i++;
	break;
      case 'f': /* output format */
        if (strcmp(argv[i+1], "ISO8859_1") == 0) out_format=ISO8859_1;
        if (strcmp(argv[i+1], "TeX")       == 0) out_format=TeX;
        if (strcmp(argv[i+1], "HTML")      == 0) out_format=HTML;
        if (strcmp(argv[i+1], "SGML")      == 0) out_format=SGML;
        i++;
        break;
      case 'c': /* list of chars (_ = not recognized chars) */
	lc = s1;
	i++;
	break;
      case 'd': /* dust size */
	dust_size = atoi(s1);
	i++;
	break;
      case 'l': /* grey level 0<160<=255 */
	cs = atoi(s1);
	i++;
	break;
      case 's': /* spacewidth/dots (0 = autodetect) */
	spc = atoi(s1);
	i++;
	break;
      case 'v': /* verbose mode */
	verbose |= atoi(s1);
	i++;
	break;
      case 'm': /* operation modes */
	mo |= atoi(s1);
	i++;
	break;
      case 'n': /* numbers only */
	numo = atoi(s1);
	i++;
	break;
      default:
	fprintf(stderr, "# unknown option use -h for help\n");
      }
      continue;
    }
    else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
						   || argv[i][1] == '\0' ) {
      inam = argv[i];
    }
  }

  if (!cs) /* default grey level */
    cs = 128 + 32;

  if (verbose) {
    fprintf(stderr, "#");
    out_version(0);
    fprintf(stderr,
	    "# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d %s\n",
	    cs, spc, verbose, lc, mo, dust_size, numo, inam);
#ifdef USE_UNICODE
    fprintf(stderr,"# using unicode\n");
#endif
    gettimeofday(&init, NULL);
  }

  set_only_numbers(numo);

  /* ----- read picture */
  if (strstr(inam, ".pcx"))
    readpcx(inam, &p, verbose);
  else
#ifdef HAVE_PNM_H
    readpnm(inam, &p, verbose);
#else
    readpgm(inam, &p, verbose);
#endif

  /* separation of main and rest for using as lib
     this will be changed later => introduction of set_option()
     for better communication to the engine  */

  /* call main loop */
  pgm2asc(&p, mo, cs, spc, dust_size, lc, verbose);

  /* get/print output */
  linecounter = 0;
  line = getTextLine(linecounter++);
  while (line) {
#ifdef USE_UNICODE
    c = line;
    while (*c!= 0x0000) {
//      fprintf(stdout, "%ld:%s ", *c, decode(*c, ISO8859_1) );
      fputs((char *)decode(*c, out_format), stdout);
      c++;
    }
    fputc('\n', stdout);
#else
    printf("%s\n", line);
#endif
    line = getTextLine(linecounter++);
  }
  free_textlines();

  free( p.p );
  
  /* show elapsed time */
  if (verbose) {
    struct timeval end, result;
    gettimeofday(&end, NULL);
    timeval_subtract(&result, &end, &init);
    printf("Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
	(int)result.tv_sec%60, (float)result.tv_usec/1000);
  }

  return 0;
}
