/* PSPP - computes sample statistics.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Written by Ben Pfaff <blp@gnu.org>.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA. */

#include <config.h>
#include <assert.h>
#include <stdlib.h>
#include "common.h"
#include "error.h"
#include "approx.h"
#include "file-handle.h"
#include "dfm.h"
#include "getline.h"
#include "misc.h"
#include "str.h"
#include "lexer.h"
#include "lexerP.h"
#include "var.h"
#include "command.h"
#include "vfm.h"

/* Defines the three types of complex files read by FILE TYPE. */
enum
  {
    FTY_MIXED,
    FTY_GROUPED,
    FTY_NESTED
  };

/* Limited variable column specifications. */
typedef struct
  {
    char name[9];		/* Variable name. */
    int fc, nc;			/* First column (1-based), # of columns. */
    int fmt;			/* Format type. */
    variable *v;		/* Variable. */
  }
col_spec;

/* RCT_* record type constants. */
enum
  {
    RCT_OTHER = 001,		/* 1=OTHER. */
    RCT_SKIP = 002,		/* 1=SKIP. */
    RCT_DUPLICATE = 004,	/* DUPLICATE: 0=NOWARN, 1=WARN. */
    RCT_MISSING = 010,		/* MISSING: 0=NOWARN, 1=WARN. */
    RCT_SPREAD = 020		/* SPREAD: 0=NO, 1=YES. */
  };

/* Represents a RECORD TYPE command. */
typedef struct record_type record_type;
struct record_type
  {
    record_type *next;
    unsigned flags;		/* RCT_* constants. */
    value *v;			/* Vector of values for this record type. */
    int nv;			/* Length of vector V. */
    col_spec case_sbc;		/* CASE subcommand. */
    int ft, lt;			/* First, last transformation index. */
  };				/* record_type */

/* Represents a FILE TYPE input program.  Does not contain a
   trns_header because it's never submitted as a transformation. */
typedef struct
  {
    int type;			/* One of the FTY_* constants. */
    file_handle *handle;	/* File handle of input file. */
    col_spec record;		/* RECORD subcommand. */
    col_spec case_sbc;		/* CASE subcommand. */
    int wild;			/* 0=NOWARN, 1=WARN. */
    int duplicate;		/* 0=NOWARN, 1=WARN. */
    int missing;		/* 0=NOWARN, 1=WARN, 2=CASE. */
    int ordered;		/* 0=NO, 1=YES. */
    int had_rec_type;		/* 1=Had a RECORD TYPE command.
				   RECORD TYPE must precede the first
				   DATA LIST. */
    record_type *recs_head;	/* List of record types. */
    record_type *recs_tail;	/* Last in list of record types. */
  }
file_type_pgm;

/* Current FILE TYPE input program. */
static file_type_pgm fty;

static int parse_col_spec (col_spec *, const char *);
static void create_col_var (col_spec * c);

/* Parses FILE TYPE command. */
int
cmd_file_type (void)
{
  /* Initialize. */
  discard_variables ();
  fty.handle = inline_file;
  fty.record.name[0] = 0;
  fty.case_sbc.name[0] = 0;
  fty.wild = fty.duplicate = fty.missing = fty.ordered = 0;
  fty.had_rec_type = 0;
  fty.recs_head = fty.recs_tail = NULL;

  match_id (TYPE);
  if (match_id (MIXED))
    fty.type = FTY_MIXED;
  else if (match_id (GROUPED))
    {
      fty.type = FTY_GROUPED;
      fty.wild = 1;
      fty.duplicate = 1;
      fty.missing = 1;
      fty.ordered = 1;
    }
  else if (match_id (NESTED))
    fty.type = FTY_NESTED;
  else
    return msg (SE, _("MIXED, GROUPED, or NESTED expected."));

  while (token != '.')
    {
      if (match_id (FILE))
	{
	  match_tok ('=');
	  fty.handle = fh_parse_file_handle ();
	  if (!fty.handle)
	    return 0;
	}
      else if (match_id (RECORD))
	{
	  match_tok ('=');
	  if (!parse_col_spec (&fty.record, "####RECD"))
	    return 0;
	}
      else if (match_id (CASE))
	{
	  if (fty.type == FTY_MIXED)
	    return msg (SE, _("The CASE subcommand is not valid on FILE TYPE "
			      "MIXED."));
	  match_tok ('=');
	  if (!parse_col_spec (&fty.case_sbc, "####CASE"))
	    return 0;
	}
      else if (match_id (WILD))
	{
	  match_tok ('=');
	  if (match_id (WARN))
	    fty.wild = 1;
	  else if (match_id (NOWARN))
	    fty.wild = 0;
	  else
	    return msg (SE, _("WARN or NOWARN expected after WILD."));
	}
      else if (match_id (DUPLICATE))
	{
	  if (fty.type == FTY_MIXED)
	    return msg (SE, _("The DUPLICATE subcommand is not valid on "
			      "FILE TYPE MIXED."));
	  match_tok ('=');
	  if (match_id (WARN))
	    fty.duplicate = 1;
	  else if (match_id (NOWARN))
	    fty.duplicate = 0;
	  else if (match_id (CASE))
	    {
	      if (fty.type != FTY_NESTED)
		return msg (SE, _("DUPLICATE=CASE is only valid on "
			    "FILE TYPE NESTED."));
	      fty.duplicate = 2;
	    }
	  else
	    return msg (SE, _("WARN%s expected after DUPLICATE."),
			(fty.type == FTY_NESTED ? _(", NOWARN, or CASE")
			 : _(" or NOWARN")));
	}
      else if (match_id (MISSING))
	{
	  if (fty.type == FTY_MIXED)
	    return msg (SE, _("The MISSING subcommand is not valid on "
			      "FILE TYPE MIXED."));
	  match_tok ('=');
	  if (match_id (NOWARN))
	    fty.missing = 0;
	  else if (match_id (WARN))
	    fty.missing = 1;
	  else
	    return msg (SE, _("WARN or NOWARN after MISSING."));
	}
      else if (match_id (ORDERED))
	{
	  if (fty.type != FTY_GROUPED)
	    return msg (SE, _("ORDERED is only valid on FILE TYPE GROUPED."));
	  match_tok ('=');
	  if (match_id (YES))
	    fty.ordered = 1;
	  else if (match_id (NO))
	    fty.ordered = 0;
	  else
	    return msg (SE, _("YES or NO expected after ORDERED."));
	}
      else
	return syntax_error (_("while expecting a valid subcommand"));
    }

  if (fty.record.name[0] == 0)
    return msg (SE, _("The required RECORD subcommand was not present."));
  if (fty.type == FTY_GROUPED)
    {
      if (fty.case_sbc.name[0] == 0)
	return msg (SE, _("The required CASE subcommand was not present."));
      if (streq (fty.case_sbc.name, fty.record.name))
	return msg (SE, _("CASE and RECORD must specify different variable "
			  "names."));
    }

  default_handle = fty.handle;

  vfm_source = &file_type_source;
  create_col_var (&fty.record);
  if (fty.case_sbc.name[0])
    create_col_var (&fty.case_sbc);

  return 1;
}

/* Creates a variable with attributes specified by col_spec C, and
   stores it into C->V. */
static void
create_col_var (col_spec * c)
{
  int type;
  int width;

  type = (formats[c->fmt].cat & FCAT_STRING) ? ALPHA : NUMERIC;
  if (type == ALPHA)
    width = c->nc;
  else
    width = 0;
  c->v = force_create_variable (&default_dict, c->name, type, width);
}

/* Parses variable, column, type specifications for a variable. */
static int
parse_col_spec (col_spec *c, const char *def_name)
{
  fmt_spec spec;

  if (token == ID)
    {
      strcpy (c->name, tokstr);
      get_token ();
    }
  else
    strcpy (c->name, def_name);

  force_int ();
  c->fc = tokint;
  if (c->fc < 1)
    return msg (SE, _("Column value must be positive."));
  get_token ();

  convert_negative_to_dash ();
  if (match_tok ('-'))
    {
      force_int ();
      c->nc = tokint;
      get_token ();

      if (c->nc < c->fc)
	return msg (SE, _("Ending column precedes beginning column."));
      c->nc -= c->fc - 1;
    }
  else
    c->nc = 1;

  if (match_tok ('('))
    {
      const char *cp;
      force_id ();
      c->fmt = parse_format_specifier_name (&cp, 0);
      if (c->fmt == -1)
	return 0;
      if (*cp)
	return msg (SE, _("Bad format specifier name."));
      get_token ();
      force_match (')');
    }
  else
    c->fmt = FMT_F;

  spec.type = c->fmt;
  spec.w = c->nc;
  spec.d = 0;
  return check_input_specifier (&spec);
}

/* RECORD TYPE. */

/* Structure being filled in by internal_cmd_record_type. */
static record_type rct;

static int internal_cmd_record_type (void);

/* Parse the RECORD TYPE command. */
int
cmd_record_type (void)
{
  int result = internal_cmd_record_type ();

  if (!result)
    {
      int i;

      if (formats[fty.record.fmt].cat & FCAT_STRING)
	for (i = 0; i < rct.nv; i++)
	  free (rct.v[i].c);
      free (rct.v);
    }

  return result;
}

static int
internal_cmd_record_type (void)
{
  /* Initialize the record_type structure. */
  rct.next = NULL;
  rct.flags = 0;
  if (fty.duplicate)
    rct.flags |= RCT_DUPLICATE;
  if (fty.missing)
    rct.flags |= RCT_MISSING;
  rct.v = NULL;
  rct.nv = 0;
  rct.ft = n_trns;
  if (fty.case_sbc.name[0])
    rct.case_sbc = fty.case_sbc;
#if __CHECKER__
  else
    memset (&rct.case_sbc, 0, sizeof (col_spec));
  rct.lt = -1;
#endif

  /* Make sure we're inside a FILE TYPE structure. */
  if (pgm_state != STATE_INPUT || vfm_source != &file_type_source)
    return msg (SE, _("This command may only appear within a "
		      "FILE TYPE/END FILE TYPE structure."));
  if (fty.recs_tail && (fty.recs_tail->flags & RCT_OTHER))
    return msg (SE, _("OTHER may appear only on the last RECORD "
		      "TYPE command."));
  if (fty.recs_tail)
    {
      fty.recs_tail->lt = n_trns - 1;
      if (!(fty.recs_tail->flags & RCT_SKIP)
	  && fty.recs_tail->ft == fty.recs_tail->lt)
	return msg (SE, _("No input commands (DATA LIST, REPEATING DATA) "
			  "for above RECORD TYPE."));
    }

  match_id (RECORD);
  match_id (TYPE);

  /* Parse record type values. */
  if (match_id (OTHER))
    rct.flags |= RCT_OTHER;
  else
    {
      int mv = 0;

      while (token == NUM || token == STRING)
	{
	  if (rct.nv >= mv)
	    {
	      mv += 16;
	      rct.v = xrealloc (rct.v, mv * sizeof (value));
	    }

	  if (formats[fty.record.fmt].cat & FCAT_STRING)
	    {
	      force_string ();
	      rct.v[rct.nv].c = xmalloc (fty.record.nc + 1);
	      strbarepadcpy (rct.v[rct.nv++].c, tokstr, fty.record.nc + 1, ' ');
	    }
	  else
	    {
	      force_num ();
	      rct.v[rct.nv++].f = tokval;
	    }
	  get_token ();

	  match_tok (',');
	}
    }

  /* Parse the rest of the subcommands. */
  while (token != '.')
    {
      if (match_id (SKIP))
	rct.flags |= RCT_SKIP;
      else if (match_id (CASE))
	{
	  if (fty.type == FTY_MIXED)
	    return msg (SE, _("The CASE subcommand is not allowed on "
			"the RECORD TYPE command for FILE TYPE MIXED."));
	  match_tok ('=');
	  if (!parse_col_spec (&rct.case_sbc, ""))
	    return 0;
	  if (rct.case_sbc.name[0])
	    return msg (SE, _("No variable name may be specified for the "
			"CASE subcommand on RECORD TYPE."));
	  if ((formats[rct.case_sbc.fmt].cat ^ formats[fty.case_sbc.fmt].cat)
	      & FCAT_STRING)
	    return msg (SE, _("The CASE column specification on RECORD TYPE "
			"must give a format specifier that is the "
			"same type as that of the CASE column "
			"specification given on FILE TYPE."));
	}
      else if (match_id (DUPLICATE))
	{
	  match_tok ('=');
	  if (match_id (WARN))
	    rct.flags |= RCT_DUPLICATE;
	  else if (match_id (NOWARN))
	    rct.flags &= ~RCT_DUPLICATE;
	  else
	    return msg (SE, _("WARN or NOWARN expected on DUPLICATE "
			"subcommand."));
	}
      else if (match_id (MISSING))
	{
	  match_tok ('=');
	  if (match_id (WARN))
	    rct.flags |= RCT_MISSING;
	  else if (match_id (NOWARN))
	    rct.flags &= ~RCT_MISSING;
	  else
	    return msg (SE, _("WARN or NOWARN expected on MISSING subcommand."));
	}
      else if (match_id (SPREAD))
	{
	  match_tok ('=');
	  if (match_id (YES))
	    rct.flags |= RCT_SPREAD;
	  else if (match_id (NO))
	    rct.flags &= ~RCT_SPREAD;
	  else
	    return msg (SE, _("YES or NO expected on SPREAD subcommand."));
	}
      else
	return syntax_error (_("while expecting a valid subcommand"));
    }

  if (fty.recs_head)
    fty.recs_tail = fty.recs_tail->next = xmalloc (sizeof (record_type));
  else
    fty.recs_head = fty.recs_tail = xmalloc (sizeof (record_type));
  memcpy (fty.recs_tail, &rct, sizeof (record_type));

  return 1;
}

/* END FILE TYPE. */

int
cmd_end_file_type (void)
{
  int result = 1;

  if (pgm_state != STATE_INPUT || vfm_source != &file_type_source)
    return msg (SE, _("This command may only appear within a "
		"FILE TYPE/END FILE TYPE structure."));

  match_id (TYPE);
  if (token != '.')
    {
      syntax_error (_("expecting end of command"));
      result = -2;
    }

  if (fty.recs_tail)
    {
      fty.recs_tail->lt = n_trns - 1;
      if (!(fty.recs_tail->flags & RCT_SKIP)
	  && fty.recs_tail->ft == fty.recs_tail->lt)
	{
	  msg (SE, _("No input commands (DATA LIST, REPEATING DATA) "
	       "on above RECORD TYPE."));
	  goto fail;
	}
    }
  else
    {
      msg (SE, _("No commands between FILE TYPE and END FILE TYPE."));
      goto fail;
    }

  f_trns = n_trns;

  return result;

fail:
  /* Come here on discovering catastrophic error. */
  cond_fail ();
  discard_variables ();
  return 0;
}

/* FILE TYPE runtime. */

/*static void read_from_file_type_mixed(void);
   static void read_from_file_type_grouped(void);
   static void read_from_file_type_nested(void); */

/* Reads any number of cases into temp_case and calls write_case() for
   each one.  Compare data-list.c:read_from_data_list. */
static void
file_type_source_read (void)
{
  char *line;
  int len;

  fmt_spec format;

  if (fty.handle != inline_file)
    dfm_push_cust (fty.handle);

  format.type = fty.record.fmt;
  format.w = fty.record.nc;
  format.d = 0;
  while (NULL != (line = dfm_get_record (fty.handle, &len)))
    {
      record_type *iter;
      value v;
      int i;

      if (formats[fty.record.fmt].cat & FCAT_STRING)
	{
	  v.c = temp_case->data[fty.record.v->fv].s;
	  parse_string_as_format (&line[fty.record.fc - 1], len, &format,
				  fty.record.fc, (value *) v.c);
	  for (iter = fty.recs_head; iter; iter = iter->next)
	    {
	      if (iter->flags & RCT_OTHER)
		goto found;
	      for (i = 0; i < iter->nv; i++)
		if (!memcmp (iter->v[i].c, v.c, fty.record.nc))
		  goto found;
	    }
	  if (fty.wild)
	    msg (SW, _("Unknown record type \"%.*s\"."), fty.record.nc, v.c);
	}
      else
	{
	  parse_string_as_format (&line[fty.record.fc - 1], len, &format,
				  fty.record.fc, &v);
	  memcpy (&temp_case->data[fty.record.v->fv].f, &v.f, sizeof (v.f));
	  for (iter = fty.recs_head; iter; iter = iter->next)
	    {
	      if (iter->flags & RCT_OTHER)
		goto found;
	      for (i = 0; i < iter->nv; i++)
		if (approx_eq (iter->v[i].f, v.f))
		  goto found;
	    }
	  if (fty.wild)
	    msg (SW, _("Unknown record type %g."), v.f);
	}
      dfm_fwd_record (fty.handle);
      continue;

    found:
      /* Arrive here if there is a matching record_type, which is in
         iter. */
      dfm_fwd_record (fty.handle);
    }

/*  switch(fty.type)
   {
   case FTY_MIXED: read_from_file_type_mixed(); break;
   case FTY_GROUPED: read_from_file_type_grouped(); break;
   case FTY_NESTED: read_from_file_type_nested(); break;
   default: assert(0);
   } */

  if (fty.handle != inline_file)
    pop_cust ();
}

static void
file_type_source_destroy_source (void)
{
  record_type *iter, *next;

  cancel_transformations ();
  for (iter = fty.recs_head; iter; iter = next)
    {
      next = iter->next;
      free (iter);
    }
}

case_stream file_type_source =
  {
    NULL,
    file_type_source_read,
    NULL,
    NULL,
    file_type_source_destroy_source,
    NULL,
    "FILE TYPE",
  };
