/** @defgroup automaton */
// @{

#ifndef __AUTOMATON_H__
#define __AUTOMATON_H__

//STL
#include <vector>
#include <queue>
#include <stack>
#include <functional>
#include <map>
//#include <list>
#include <ext/slist>
using __gnu_cxx::slist;
//STD
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <string>
//STR
using namespace std;
#include "macro.h"
#include "seed.h"


/**
 * @class transition
 *	
 * @brief transition to a given state (on a given letter)
 */

class transition {
 public:
  /** @brief build a transition object
   *  @param state gives the state number reached by this transition
   *  @param prob  gives the probability of this transition
   */
 transition(int state = 0, double prob = 1e+0):state(state),prob(prob) {};
  /// state number to be reached
  int     state; 
  /// transition probability (1.0 if deterministic automaton) 
  double  prob;
};

/**
 * @class state
 *	
 * @brief state represented as a table of list of transitions ("next[gv_align_alphabet_size]") + a table of lists of back transitions ("prev[gv_align_alphabet_size]") 
 *
 */

class state {
 public:
  /// Build an empty state (empty transition lists)
 state(int final = 0): final(final) {
    next  =  vector< slist < transition > >(gv_align_alphabet_size,slist < transition > ());
    prev  =  vector< slist < transition > >(gv_align_alphabet_size,slist < transition > ());
  };
  

  /// Erase a state (clear transition lists first)
  ~state() {
    for(int a = 0 ; a < gv_align_alphabet_size ; a++){
      next[a].clear();
      prev[a].clear();
    }
    next.clear();
    prev.clear();
  };
  
  /// Clear transition lists
  void clear() {
    for(int a = 0 ; a < gv_align_alphabet_size ; a++){
      next[a].clear();
      prev[a].clear();
    }
    next.clear();
    prev.clear();
  };


  /// final state
  int final;

  /** @brief give the next state  @f$ \psi(current,a)  @f$
   *  @param a is the choosen transition letter 
   *  @return a reference to the list of transition from this state
   */
  slist < transition > &  nextState(int a)      { return next[a];} 
  
  /** @brief return a list of previous states
   *  @param a is the choosen transition letter 
   *  @return a reference to the list of transition which leads to this state
   */
  slist < transition > &  previousStates(int a) { return prev[a];}
      
  /// forward  transitions list on letter A
  vector < slist < transition > >  next;
  /// backward transitions list on letter A
  vector < slist < transition > >  prev;

};




/**
 * @class automaton
 *
 * @brief automaton (deterministic or non-deterministic, probabilistic or not) roughly represented as a vector of states
 *
 *  this one can either be affected to: 
 *	@li a seed automaton  (deterministic, initial state is 1, final states have the final tag)
 *	@li a probability model (Bernoulli, Markov, and old M3,M14,HMM models, can either be deterministic or non-deterministic)
 *	@li a target alignment model (deterministic)
 *
 */

class automaton {
 public:
  
  /// constructor for an empty automaton
  automaton() { _states = vector<state>(0);};

  /// destructor for all possibles automata build by this class
  ~automaton() {
    for(int i = 0 ; i < (int)_states.size() ; i++){
      _states[i].clear();
    }
    _states.clear();
  };
  
  /** @name  Build automata 
   *  @brief each one builds a special automaton 
   */

  //@{
  
  /** @brief Old algorithm in @f$ \mathcal{O}(w 2^{s-w})@f$ for subset seed matching (use on index of size @f$ \mathcal{O}(w 2^{s-w})@f$ )
   *  @param s is the seed descriptor
   *  @param matchingmatrix is a boolean  matrix that gives for alignment letter "a", and seed letter "b", the matching with "matrix[a][b]".
   *  @param nomerge if final states dont have to be merged together into a single state (default behaviour is merging).
   */
  int Automaton_SeedPrefixesMatching_old(seed & s,
					 vector< vector <int> > & matchingmatrix,
					 bool nomerge=false);

  /** @brief New linear algorithm for subset seed matching (CIAA 2007)
   *  @param s is the seed descriptor
   *  @param matchingmatrix is a boolean  matrix that gives for alignment letter "a", and seed letter "b", the matching with "matrix[a][b]".
   *  @param nomerge if final states dont have to be merged together into a single state (default behaviour is merging).
   */
  int Automaton_SeedPrefixesMatching    (seed & s,
					 vector< vector <int> > & matchingmatrix,
					 bool nomerge=false);


  /** @brief Modified version that takes lossless costs into account
   *  @param s is the seed descriptor
   *  @param matchingmatrix is a boolean  matrix that gives for alignment letter "a", and seed letter "b", the matching with "matrix[a][b]".
   *  @param nomerge if final states dont have to be merged together into a single state (default behaviour is merging).
   *  @param costs gives a vector of costs for each align alphabet letter
   *  @param cost_threshold gives a cost threshold that must not be reached by any alignment.
   *  @see Automaton_SeedPrefixesMatching
   */

  int Automaton_SeedPrefixesMatchingCost (seed& s,
					  vector< vector <int> > & matchingmatrix,
					  bool nomerge,
					  vector<int> & costs,
					  int cost_threshold);

  
  /** @brief Aho-Corasick automaton used by J.Buhler
   *  @param s is the seed descriptor
   *  @param matchingmatrix is a boolean  matrix that gives for alignment letter "a", and seed letter "b", the matching with "matrix[a][b]".
   *  @param nomerge if final states dont have to be merged together into a single state (default behaviour is merging).
   */
  int Automaton_SeedBuhler(seed & s, 
			   vector< vector <int> > & matchingmatrix, 
			   bool nomerge=false);
  
  /** @brief Aho-Corasick automaton with scoring method to prune prefixes 
   *  @param s is the seed descriptor
   *  @param matchingmatrix is a boolean  matrix that gives for alignment letter "a", and seed letter "b", the matching with "matrix[a][b]".
   *  @param scoringmatrix is an integer matrix that gives for alignment letter "a", and seed letter "b", the score    with "matrix[a][b]".
   *  @param scoringthreehold is the minimal score that has to be reached to have a match for the seed.
   *  @param nomerge if final states dont have to be merged together into a single state (default behaviour is merging).
   */

  int Automaton_SeedScore    (seed & s,  
			      vector< vector <int> > & matchingmatrix, 
			      vector< vector<int> >  & scoringmatrix, 
			      int scoringthreehold,
			      bool nomerge=false);

  /** @brief Modified version that takes lossless costs into account
   *  @param s is the seed descriptor
   *  @param matchingmatrix is a boolean  matrix that gives for alignment letter "a", and seed letter "b", the matching with "matrix[a][b]".
   *  @param scoringmatrix is an integer matrix that gives for alignment letter "a", and seed letter "b", the score    with "matrix[a][b]".
   *  @param scoringthreehold is the minimal score that has to be reached to have a match for the seed.
   *  @param nomerge if final states dont have to be merged together into a single state (default behaviour is merging).
   *  @param costs gives a vector of costs for each align alphabet letter
   *  @param cost_threshold gives a cost threshold that must not be reached by any alignment.
   *  @see Automaton_SeedScore
   */

  int Automaton_SeedScoreCost(seed & s, 
			      vector< vector <int> > & matchingmatrix, 
			      vector< vector<int> > & scoringmatrix, 
			      int scoringthreehold, 
			      bool nomerge, 
			      vector<int> & costs, 
			      int cost_threshold);

   
  /** @brief build a probabilistic bernoulli model
   *  @param p gives the probability of each letter of A (sum must be equal to 1.0).
   */
  int Automaton_Bernoulli(vector<double> & p);

  /** @brief build a probabilistic markov model of order k
   *  @param p gives the probability of each word of A^k (sum must be equal to 1.0).
   *  @param k gives the order.
   */
  int Automaton_Markov(vector<double> & p, int k);
  
  /** @brief build a Brejova M3 probabilistic model 
   *  @param p gives probabilities for 3 bernoulli models, each 
   */
  int Automaton_BrejovaM3(vector< vector<double> > & p /*[3][gv_align_alphabet_size]*/);

  /** @brief build a Brejova M13 (equivalent to  M8) probabilistic model 
   *  @brief this model is different from a Markov since its synchronized
   *  @param p gives the probability of each word from A^3 
   */
  int Automaton_BrejovaM13(vector<double> & p /*[gv_align_alphabet_size*gv_align_alphabet_size*gv_align_alphabet_size]*/);

   /** @brief build a HMM automaton (can be huge, you're warned ...)
    *  @param ptable gives several ("nbq" more exactly) M13 models.
    *  @param qtable gives HMM state transition matrix (size "nbq" x "nbq").
    *  @param nbq    indicates the number of hidden state.
    */
  int Automaton_HMM(vector< vector<double> > & ptable /* [gv_align_alphabet_size*gv_align_alphabet_size*gv_align_alphabet_size][nbq] */, vector<vector <double> > & qtable /* [nbHstates][nbHstates] */, int nbq);


  /** @brief build an Homogeneous sequence automaton. 
   *  @details It represents an alignment such that no substring of the alignment has a score greater than the full alignment
   *  @param scores gives a table of scores for each of the A letters.
   *  @param length gives the alignment length.
   */
  int Automaton_Homogeneous(vector<int> & scores,
			    int length);
 

  /** @brief build a cyclic automaton 
   *  @param cycle gives the cycle size
   *  @param final_list gives a table of positions that are final ( 0 <= final < cycle)
   *  @param final_nb   gives the size of the previous table
   */
  int Automaton_Cycle(int cycle, int * final_list, int final_nb);
  //@}





  /** @name  Manipulate automata 
   *  @brief reduction/product/and misc probabilities
   */
  //@{
  

  /** @brief Hopcroft automaton minimization 
   *  @return the minimized automaton, does not affect the current automaton
   *  @todo{FIXME : set a compatible algorithm for "final" integer states (init at : final=0/1/2 and cross ?? )}
   */
  automaton * Hopcroft();

  /** @brief Isomorphism check
   *  @param  other is the automaton to be compared to
   *  @return true if the two automata are isomorph
   */
  bool       isIsomorphTo(automaton & other);
    
  /** @brief Automata product 
   *  @param other is the second automaton used for the product
   *  @param crossFinalStates indicates if product final states are the crossproduct of both automaton final states or only one of these
   *       @li PRODUCT_UNION        : automata "union" of final states, 
   *       @li PRODUCT_INTERSECTION : automata "intersection" of final states
   *       @li PRODUCT_BUTNOT       : automata "this".final BUT NOT "other".final
   *          with
   *       @li LOOP / NO_LOOP        : indicates if the final state is a single one absorbant (force it, otherwise keep it as in the "true" product)
   *  @param otherIsProbabilist indicates that the second automaton represents a probabilistic model (false by default)
   *  @param depth indicates the maximal depth that must be reached : extra states are non final selflooping states
   *         (by default, this value is greater than 2 Billions, but the given alignment length should be enought in most cases)
   *  @return a new automaton that only gives reachable states of the product.
   */

  automaton * product(automaton & other, int crossFinalStates = 0, bool otherIsProbabilist = 0, int depth = 0x7fffffff);

#define PRODUCT_UNION_FINAL_LOOP        (0)
#define PRODUCT_INTERSECTION_FINAL_LOOP (1)
#define PRODUCT_BUTNOT_FINAL_LOOP       (2)

#define PRODUCT_UNION_NO_FINAL_LOOP        (10)
#define PRODUCT_INTERSECTION_NO_FINAL_LOOP (11)
#define PRODUCT_BUTNOT_NO_FINAL_LOOP       (12)

#define PRODUCT_UNION_NO_FINAL_LOOP_ADD    (20)


  /** @brief Compute the multiple hit automaton
   *  @param m is the number of hits needed to hit an alignment
   *  @return the automaton that take at least @f$m@f$ hits of the 
   *          original automaton to hit the alignment
   */
  automaton * mhit(unsigned int m, int depth);

  
  /** @brief Compute probability to be at a final state during the "nbSteps"th step
   *  @param nbSteps is the number of iterations done on the automaton to compute the probability (64 is usual ... so i use 64) 
   *  @return the probabilty to be at a final state at the "nbSteps"-th step 
   */
  double    PrFinal(int nbSteps = 64);


  /** @brief Gives the transition probability
   *  @param a is the transition letter
   *  @param startingState is the starting state
   *  @param endingState is the ending state
   *  @return either 0 if there is no link either > 0
   */
  double    Pr(int a, int startingState, int endingState);



  /** @brief Gives the probability to hit an alignment of the lossless set.
   *  @param nbSteps is the number of iterations done on the automaton to compute the probability
   *  @param costs is the vector of costs used on alignment alphabets
   *  @param cost_threshold is the maximal cost allowed
   *  @return the probabilty to hit such alignemnt (1.0 if lossless)
   *  @todo{FIXME : check if it does correct job for overlaps of 2 different seeds}
   *  @todo{FIXME : extends the algorithm for several hits of one/several seeds}
   *  @see Lossless 
   */  
  double PrLossless(int nbSteps, vector<int> costs, int cost_threshold);



  /** @brief Gives the lossless property of a seed (set of seeds)
   *  @param nbSteps is the number of iterations done on the automaton to compute the probability
   *  @param costs is the vector of costs used on alignment alphabets
   *  @param cost_threshold is the maximal cost allowed
   *  @param Nocc is the minimal number of seed occurences
   *  @return a boolean that indicates if the set of seeds is lossless (at least Nocc occurences everytime),
   *  @todo{FIXME : check if it does correct job for overlaps of 2 different seeds}
   *  @todo{FIXME : extends the algorithm for several hits of one/several seeds}
   *  @see PrLossless 
   */
  bool  Lossless(int nbSteps, vector<int> costs, int cost_threshold, int Nocc = 1);
    
  //@}




  /**@name  IO methods 
   * @brief IO streams to print/store/load automaton 
   */

  //@{

  /// print the graphviz/dot form of the automaton
  void dot(ostream& os);  
  /// print automaton information
  friend ostream& operator<<(ostream& os, automaton& automaton); 
  /// read  automaton information
  friend istream& operator>>(istream& is, automaton& automaton);
  //@}	

  /** @name  Miscellaneous
   *  @brief miscellaneous methods
   */ 

  //@{
  ///  return the size (number of states) of the current automaton
  inline int  size() { return _states.size(); }
  ///  make the state "stateNb" loop on itselft for any letter
  inline void selfLoop(int stateNb);
  ///  generate an alignment sequence of given length from the current probabilistic automaton
  void GenSeq(int len);
  //@}


 protected:
  /** @name automaton manipulation routines 
   */
  //@{
  /** @brief add a new state 
   *  @param final indicates if this is a final state
   */
  inline int addNewState(int final = 0);

  /** @brief add a new transition between two states
   *  @param a is the transition letter read
   *  @param startingState is the initial transition state number
   *  @param endingState is the final transition state number 
   *  @param prob is the probability assigned to the current transition
   */
  inline int addNewTransition(int a , int startingState , int endingState, double prob = (1.00/gv_align_alphabet_size));

  /** @brief change the transition probability
   *  @param "a" is the transition letter	
   *  @param startingState is the starting state
   *  @param endingState is the ending state
   *  @param prob is the new probability 
   *  @return 0
   */
  inline int changeTransitionProb( int a, int startingState, int endingState, double prob);

  
  /** @brief check if there is a transition from the startingState labeled with "a"
   *  @param a is the transition letter
   *  @param startingState is the starting state
   *  @return true if there is at least one transition
   */
  inline bool hasTransition( int a, int startingState);
  
  // @}
  
  /// vector of states currently assigned
  vector<state> _states; 
};
#endif

// @}
