(*---------------------------------------------------------------------------*
  INTERFACE  cf_dfa.mli

  Copyright (c) 2002-2004, James H. Woodyatt
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:

    Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.

    Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in
    the documentation and/or other materials provided with the
    distribution

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  OF THE POSSIBILITY OF SUCH DAMAGE. 
 *---------------------------------------------------------------------------*)

(** Functional composition of lazy deterministic finite automata. *)

(** {6 Overview}

    This module implements operators for functional composition of lazy
    deterministic finite automata (DFA).  A lazy DFA is more efficient at
    recognizing regular grammars than a non-deterministic finite automaton,
    and the lazy evaluation amortizes the cost of compiling the state table
    so that it compares well to that of the NFA.
    
    The interface defined here is used as the underlying algorithm for the
    {!Cf_lexer} module.  It uses a functor that operates on a module defining
    the type of a symbol and the bidirectional mapping between symbols and
    integers (used for indexing in the state transition array).  The result
    of the functor is a module that contains operator functions for composing
    expressions and rules for automata that operate on streams of the input
    symbol type.
    
    {b Note}: a DFA can be remarkably inefficient compared to an NFA for
    certain classes of unusual grammars and unusual input.
*)

(** {6 Classes} *)

(** The class of cursors used by lazy DFA parser.  It inherits from the
    basic parser and defines a new method for handling errors.
*)

class ['i] cursor:
    int ->  (** The initial position, i.e. usually zero *)
    object('self)
        inherit ['i] Cf_parser.cursor
        
        (** This method is invoked as [c#error n z] in a DFA when no rule
            matches the input stream [z] after [n] symbols.  The purpose is to
            give a derived class an opportunity to raise an exception rather
            that allow the parser to return without a match.
        *)
        method error: int -> ('i * 'self) Cf_seq.t -> unit
    end

(** {6 Module Types} *)

(** The type of the input module for [Create(S: Symbol_T)] functor defined
    below.
*)
module type Symbol_T = sig
    (** The symbol type *)
    type t
    
    (** The size of an array containing an element for every symbol. *)
    val size: int
    
    (** A function to map a symbol to its integer number.  Must not return
        a value less than [size], and [of_int (to_int symbol)] must be
        equivalent to the identity function.
    *)
    val to_int: t -> int
    
    (** A function to map an integer number to its symbol. *)
    val of_int: int -> t
end

(** The output of the [Create(S: Symbol_T)] functor, which is a module that
    can be used to compose deterministic finite automata which operate on
    symbols of the type specified.
*)
module type T = sig
    (** The module used as the input to the [Create(S: Symbol_T)] functor. *)
    module S: Symbol_T

    (** A deterministic finite automata is a function of type {!Cf_parser.X.t}
        that recognizes a stream of input symbols woven with objects of a
        cursor class, and produces output according to the types of the rules
        used in the composition of the automaton.
    *)
    type ('c, 'x) t = ('c, S.t, 'x) Cf_parser.X.t constraint 'c = S.t #cursor
    
    (** The type of an expression in the regular grammar of an automaton. *)
    type expr_t
    
    (** The type of a rule for recognizing a sequence of symbols according to
        the regular grammar of an automaton and producing an output token.
    *)
    type ('c, 'x) rule_t constraint 'c = S.t #cursor

    (** The expression that matches the empty symbol sequence. *)
    val nil: expr_t

    (** Open this module to bring the composition operators into the current
        scope.
    *)
    module Op: sig
    
        (** Use [a $| b] to compose an expression that matches either [a] or
            [b] in the symbol stream.
        *)
        val ( $| ): expr_t -> expr_t -> expr_t
        
        (** Use [a $& b] to compose an expression that matches [a] followed by
            [b] in the symbol stream.
        *)
        val ( $& ): expr_t -> expr_t -> expr_t
        
        (** Use [!*a] to compose an expression that matches zero or more
            occurances of [a] in the symbol stream.
        *)
        val ( !* ): expr_t -> expr_t
        
        (** Use [!+a] to compose an expression that matches one or more
            occurances of [a] in the symbol stream.
        *)
        val ( !+ ): expr_t -> expr_t
        
        (** Use [!?a] to compose an expression that matches zero or one
            occurance of [a] in the symbol stream.
        *)
        val ( !? ): expr_t -> expr_t

        (** Use [!:sym] to compose an expression that matches the symbol [sym]
            in the symbol stream.
        *)
        val ( !: ): S.t -> expr_t
        
        (** Use [!^f] to compose an expression that matches any symbol in the
            symbol stream for which applying the function [f] returns [true].
        *)
        val ( !^ ): (S.t -> bool) -> expr_t
        
        (** Use [!~z] to compose an expression that matches the sequence of
            symbols [z] in the symbol stream.
        *)
        val ( !~ ): S.t Cf_seq.t -> expr_t

        (** Use [e $= x] to compose a rule that produces [x] when the symbols
            in the symbol stream match the expression [e].
        *)
        val ( $= ): expr_t -> 'x -> ('c, 'x) rule_t
        
        (** Use [e $> f] to compose a rule that applies the tokenizer function
            [f] to the sequence of symbols in the symbol stream recognized by
            the expression [e] to produce an output token.
        *)
        val ( $> ): expr_t -> (S.t Cf_seq.t -> 'x) -> ('c, 'x) rule_t
        
        (** Use [e $@ f] to compose a rule that applies the scanning function
            [f] to the input symbol stream when it is recognized by the
            expression [e].  The scanning function is passed the length of the
            recognized sequence of symbols and the receives as a parser in
            result that produces the output of the makes any advanced
            manipulations of the input stream necessary to continue parsing
            for the next token.  If the parser returned from the scanning
            function does not recognize the input stream, then the rule is
            not matched and the next best matching rule is selected.
        *)
        val ( $@ ): expr_t -> (int -> ('c, 'x) t) -> ('c, 'x) rule_t
        
        (** Use this operator to combine a list of rules into a single rule. *)
        val ( !@ ): ('c, 'x) rule_t list -> ('c, 'x) rule_t
    end
    
    (** Use [create r] to compose a new DFA from the rule [r]. *)
    val create: ('c, 'x) rule_t -> ('c, 'x) t
end

(** The functor that creates a DFA module. *)
module Create(S: Symbol_T): T with module S = S

(*--- End of File [ cf_dfa.mli ] ---*)
