#ifndef _PROFILE_H
#define _PROFILE_H
#include <stdio.h> 
#include <stdbool.h>
#include <string.h>
#include <xmmintrin.h>
#include <mm_malloc.h>
#include <alloca.h>

#define ALPHABET              "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define ALPHABET_SIZE 		26
#define TRANSITION_SIZE 	9
#define PROFILE_MAX_LINE_SIZE   512

#define KDIS                    2
#define KDPM                    2

#define MAXN                    8
#define KNOR                    3
#define KNPM                    5

#define MAXC                    8

#define NLOW                    -16383

/************************** Insertion Matrices ************************
 * There are 3 different matrices for:
 *    - alphabet
 *    - boundaries
 *    - transitions
 */
#define _D   ALPHABET_SIZE+1

/* BOUNDARIES */
#define _B0  0
#define _B1  1

#define _E0  2
#define _E1  3

#define _BM  4
#define _BI  5
#define _BD  6
#define _BE  7

#define _ME  8
#define _IE  9
#define _DE 10

#define INSERTION_BOUNDARIES_SIZE 11

/* TRANSITIONS */
////////////////////////////// FOR SSE2 to WORK //////////////////////////////////////////////
// WARNING: MATCH=2,INSERTION=3,DELETION=0,DUMMY=1
enum VectorPosition {
  /* Positions within both 4-tuple and array of 4-tuple */ 
  MATCH=2,
  INSERTION=3,
  DELETION=0,
  EXTRA=1,
  /* Position of empty space within 4-tuple */
  DUMMY=1,
  /* Positions of transition from ? to ? within array of 4-tuples */
  _MM = 4*MATCH+MATCH,     _MI = 4*MATCH+INSERTION,     _MD = 4*MATCH+DELETION,     _MX    = 4*MATCH+EXTRA,
  _IM = 4*INSERTION+MATCH, _II = 4*INSERTION+INSERTION, _ID = 4*INSERTION+DELETION, _IX    = 4*INSERTION+EXTRA,
  _DM = 4*DELETION+MATCH,  _DI = 4*DELETION+INSERTION,  _DD = 4*DELETION+DELETION,  _DX    = 4*DELETION+EXTRA,
  _XM = 4*EXTRA+MATCH,     _XI = 4*EXTRA+INSERTION,     _XD = 4*EXTRA+DELETION,     _DUMMY = 4*EXTRA+EXTRA,
  /* Overall Size of transtion structure */
  INSERTION_TRANSITIONS_SIZE = 16 
} VectorPosition ;
////////////////////////////// FOR SSE4.1 to WORK //////////////////////////////////////////////
// WARNING: MATCH and INSERTION should be next to each other (in this ordering) 
//          and either in the upper part of xmm or the lower part.
//          Do no mix them and correct the storing function according to your above choice.
static inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 
StoreMatchInsertion( __m64 * const _address, const __m128 _reg)
{
    //_mm_storel_pi(_address, _reg);
    _mm_storeh_pi(_address, _reg);
}

typedef union {
    struct { float M; float I; } Elementf;
    struct { int M; int I; } Element;
    __m64 mm;
  } sIOP;
  
typedef union ScoreTuple {
  short int To[4];
  short int From[4];
  __m64 mm;
} ScoreTuple;

typedef union {
  ScoreTuple From[4];
  short int Element[INSERTION_TRANSITIONS_SIZE];
} TransitionScores;

typedef struct InsertionScores {
  short int * Alphabet;
  short int * Boundaries;
  TransitionScores * Transitions;
  ScoreTuple * FirstSequenceProtein;
  ScoreTuple * LastSequenceProtein;
} struct_InsertionScores;

/************************** Match Matrix   ************************/
typedef struct MatchScores {
  short int * alphabet;
} struct_MatchScores;


typedef struct Disjoint {
  char CDIS[KDIS][16];
  int  JDIP[KDIS];
  int  NDIP[KDPM];
  int  MDIS;
} SDisjoint;

typedef struct NormalizationItem {
  float RNOP[KNPM];
  char  CNTX[32];
  int   MNOR;
  int   NNOR;
  int   NNPR;
} SNormalizationItem;

typedef struct Normalization {
  SNormalizationItem Values[MAXN];
  char  CNOR[KNOR][16];
  int   JNOP[KNOR];
  int   JNOR;
} SNormalization;

typedef struct Average {
  float * Weights;
  size_t size;
} SAverage;

typedef int (*NormalizedToRawFunctionPtr)(const float, const size_t, const float);
typedef float (*RawToNormalizedFunctionPtr)(const int, const size_t, const float);

typedef struct CutOffItem {
  char  CCUT[32];
  float RCUT[MAXN];
  int   MCUT[MAXN];
  int   MCLE;
  int   ICUT;			// Filter cutoff
  unsigned int	HCUT;		// Heuristic cutoff
  int   JCNM;
} SCutOffItem;

typedef struct CutOff {
  SCutOffItem Values[MAXC];
  int JCUT;
} SCutOff;

struct Profile {
  char Identification[64] __attribute__((aligned(16)));
  char AC_Number[64] __attribute__((aligned(16)));
  char Date[128] __attribute__((aligned(16)));
  char Description[256] __attribute__((aligned(16)));
  unsigned char Alphabet_Mapping[ALPHABET_SIZE+2] __attribute__((aligned(16)));
  char CABC[ALPHABET_SIZE+2]; 
  size_t Length;
  size_t Alphabet_Length;
  
  union Scores {
    struct SInsertion {
      short int * Alphabet;
      short int * Boundaries;
      TransitionScores * Transitions;
      ScoreTuple * FirstSequenceProtein;
      ScoreTuple * LastSequenceProtein;
      size_t AlignStep;
      struct_MatchScores _dummy;
    } Insertion;
    struct SMatch {
      struct_InsertionScores _dummy;
      size_t AlignStep;
      short int * Alphabet;
    } Match;
  } Scores;

  _Bool isCircular;
  SNormalization NormalizationData;
  SDisjoint DisjointData;
  SCutOff CutOffData;
  size_t Level;		/* WARNING: This is not the real level but the index corresponding within the cutoff array */
  size_t Mode;          /* WARNING: This is not the real mode but the index corresponding within the normalization array */
};

typedef struct PFSequence {
  unsigned char * ProfileIndex;
  size_t Length;
} PFSequence;

/******************* XALIP & XALIT structures *******************/
union Positions {
  struct {
    int One;
    int Two;
    int B;
    int dummy;
  } Element;
  __m128i xmm;
};

union lScores {
  /* SSE 4.1 can work on integer */
  int Element[4];
  __m128i xmm;
  /* Other have to rely upon float */
  float Elementf[4];
  __m128 xmmf;
};

struct Alignment {
   int JALS,JALB,JAL1,JAL2,JALE,IPMB,IPME;
};


/******************* Global variables *******************/
extern SNormalizationItem * Normalization;
extern NormalizedToRawFunctionPtr NormalizedToRawFunction;
extern RawToNormalizedFunctionPtr RawToNormalizedFunction;
extern SAverage * Average;
extern unsigned int OutputPrintWidth;
extern int SearchLevel;

/****************** Inline functions *******************/

static inline void InitializeDefault(union Scores * const matrices, char * const MatchSymbol,
				     char * const InsertionSymbol )
{
  /* MATCHES */
  memset(matrices->Match.Alphabet, 0, (ALPHABET_SIZE+2)*sizeof(short int));
  /* INSERTIONS */   
  memset(matrices->Insertion.Alphabet,    0, (ALPHABET_SIZE+2)*sizeof(short int));
  memset(matrices->Insertion.Boundaries,  0, (INSERTION_BOUNDARIES_SIZE)*sizeof(short int));
  memset(matrices->Insertion.Transitions, 0, sizeof(TransitionScores));

  matrices->Insertion.Boundaries[_BI] = NLOW;
  matrices->Insertion.Boundaries[_BD] = NLOW;
  matrices->Insertion.Boundaries[_BE] = NLOW;
  matrices->Insertion.Boundaries[_DE] = NLOW;
  matrices->Insertion.Boundaries[_IE] = NLOW;

  matrices->Insertion.Transitions->Element[_MI] = NLOW;
  matrices->Insertion.Transitions->Element[_MD] = NLOW;
  matrices->Insertion.Transitions->Element[_ID] = NLOW;
  matrices->Insertion.Transitions->Element[_IM] = NLOW;
  matrices->Insertion.Transitions->Element[_DM] = NLOW;
  matrices->Insertion.Transitions->Element[_DI] = NLOW;
  
  *MatchSymbol = 'X';
  *InsertionSymbol = '-';
}

static inline size_t AllocateScores(union Scores * const matrices, const size_t Alphabet_Length, const size_t Profile_Length)
{
  register void * ptr;
  const size_t Aligned_Alphabet_Length = (Alphabet_Length+2 + 15) & ~15 ;
  matrices->Insertion.AlignStep = Aligned_Alphabet_Length;

  ptr =  _mm_malloc(Aligned_Alphabet_Length*(Profile_Length+1)*sizeof(short int), 64);
  if (ptr == NULL) return 1;
  matrices->Match.Alphabet = (short int *) ptr;

  ptr =  _mm_malloc(Aligned_Alphabet_Length*(Profile_Length+1)*sizeof(short int), 64);
  if (ptr == NULL) return 1;
  matrices->Insertion.Alphabet = (short int *) ptr;

  ptr =  _mm_malloc(INSERTION_BOUNDARIES_SIZE*(Profile_Length+1)*sizeof(short int), 64);
  if (ptr == NULL) return 1;
  matrices->Insertion.Boundaries  = (short int *) ptr;

  ptr =  _mm_malloc((Profile_Length+1)*sizeof(TransitionScores), 64);
   if (ptr == NULL) return 1;
  matrices->Insertion.Transitions = (TransitionScores *) ptr;

  ptr =  _mm_malloc((Profile_Length+1)*sizeof(ScoreTuple), 64);
  if (ptr == NULL) return 1;
  matrices->Insertion.FirstSequenceProtein = (ScoreTuple *) ptr;
  
  ptr =  _mm_malloc((Profile_Length+1)*sizeof(ScoreTuple), 64);
  if (ptr == NULL) return 1;
  matrices->Insertion.LastSequenceProtein = (ScoreTuple *) ptr;

  return 0;
}

static unsigned char TranslateCharToIndex(const char letter, const unsigned char * restrict const Alphabet)
{
  const unsigned char lletter = (unsigned char) letter;
  register size_t index = (size_t) ( ( lletter >= (unsigned char) 'a' ) ? lletter - ((unsigned char) 'a' - (unsigned char) 'A') : lletter );
  if ( index >= (size_t) 'A' && index <= (size_t) 'Z' ) {
    return Alphabet[index - (size_t) 'A'];
  } else {
    return 0;
  }
}

static PFSequence * TranslateSequenceToIndex(PFSequence * const Sequence, const unsigned char * restrict const Alphabet)
{
  unsigned char * restrict const CharPtr = Sequence->ProfileIndex;
  register size_t counter = 0;
  
  for (size_t i=0; i<Sequence->Length; ++i) {
    register size_t index = (size_t) ( ( CharPtr[i] >= (unsigned char) 'a' ) ? CharPtr[i] - ((unsigned char) 'a' - (unsigned char) 'A') : CharPtr[i] );
    if ( index >= (size_t) 'A' && index <= (size_t) 'Z' ) {
      CharPtr[counter++] = Alphabet[index - (size_t) 'A'];
    }
  }

  Sequence->Length = counter;

  return Sequence;
}

#ifndef _SPECIAL_HEURISTIC_
static inline const int * TransposeAndConvertMatchMatrix(const union Scores * const Matrices, const size_t Alphabet_Length,
                                                         const size_t Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;
  // Profile size rounded to cache line
  const size_t Aligned_Profile_Length = (Profile_Length+1 + 15) & ~15;
  int * restrict const TIMatch = _mm_malloc(Aligned_Profile_Length*Alphabet_Length*sizeof(int),64);
  
  if (TIMatch == NULL) return TIMatch;
  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(int));
  const register short * restrict lMatch = Matrices->Match.Alphabet;
  for (size_t iprf = 0; iprf<Profile_Length; ++iprf) {
    for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {
      TIMatch[alpha*Aligned_Profile_Length+iprf] = (int) lMatch[alpha];
    }
    lMatch += step;
  }
  return TIMatch;
}

static inline void TransposeAndConvertMatchMatrixGivenMemory(int * const restrict TIMatch, const union Scores * const Matrices,
							     const size_t Alphabet_Length, const size_t Profile_Length,
							     const size_t Aligned_Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;
 
  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(int));
  const register short * restrict lMatch = Matrices->Match.Alphabet;
  for (size_t iprf = 0; iprf<Profile_Length; ++iprf) {
    for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {
      TIMatch[alpha*Aligned_Profile_Length+iprf] = (int) lMatch[alpha];
    }
    lMatch += step;
  }
}

static inline float * TransposeAndConvertToFloatMatchMatrix(const union Scores * const Matrices, const size_t Alphabet_Length,
                                                            const size_t Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;
  // Profile size rounded to cache line boundary
  const size_t Aligned_Profile_Length = (Profile_Length+1 + 15) & ~15;
  float * restrict const TFMatch = _mm_malloc(Aligned_Profile_Length*Alphabet_Length*sizeof(float),64);
 
  if (TFMatch == NULL) return TFMatch;
  memset(TFMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(float));
  
  const register short * restrict lMatch = Matrices->Match.Alphabet;
  for (size_t iprf = 0; iprf<Profile_Length; ++iprf) {
    for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {
      TFMatch[alpha*Aligned_Profile_Length+iprf] = (float) lMatch[alpha];
    }
    lMatch += step;
  }
  return TFMatch;
}

static inline void TransposeAndConvertToFloatMatchMatrixGivenMemory(float * const restrict TIMatch, const union Scores * const Matrices,
							            const size_t Alphabet_Length, const size_t Profile_Length,
							            const size_t Aligned_Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;

  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(int));
  
  const register short * restrict lMatch = Matrices->Match.Alphabet;
  for (size_t iprf = 0; iprf<Profile_Length; ++iprf) {
    for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {
      TIMatch[alpha*Aligned_Profile_Length+iprf] = (float) lMatch[alpha];
    }
    lMatch += step;
  }
}
#else
/* 
 * These are special heuristic functions in the sense the limit negative effect of wrong match
 * replacing them where benefical by either an insertion or deletion score.
 * 
 */

#define CHANGE(type) \
  const register short * restrict lMatch = Matrices->Match.Alphabet;\
  const TransitionScores * restrict InsertionLine = Matrices->Insertion.Transitions;\
  \
  /* First line stay identical */\
  for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {\
    TIMatch[alpha*Aligned_Profile_Length] = (type) lMatch[alpha];\
  }\
  lMatch += step;\
  /* from 1 to n-1 we use special treatment */\
  for (size_t iprf = 1; iprf<Profile_Length-1; ++iprf) {\
    const short int MDDM    = InsertionLine[iprf].From[MATCH].To[DELETION]+InsertionLine[iprf+1].From[DELETION].To[MATCH];\
    const short int MIIM    = InsertionLine[iprf].From[MATCH].To[INSERTION]+InsertionLine[iprf+1].From[INSERTION].To[MATCH];\
    const short int Minimum = (MDDM>MIIM) ? MDDM : MIIM;\
    \
    for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {\
      const short int value = lMatch[alpha] < Minimum ? Minimum : lMatch[alpha];\
      TIMatch[alpha*Aligned_Profile_Length+iprf] = (type) value;\
    }\
    lMatch += step;\
  }\
  /* Last line */\
  for (size_t alpha=0; alpha <Alphabet_Length; ++alpha) {\
    TIMatch[alpha*Aligned_Profile_Length+Profile_Length-1] = (type) lMatch[alpha];\
  }
  
static inline const int * TransposeAndConvertMatchMatrix(const union Scores * const Matrices, const size_t Alphabet_Length,
                                                         const size_t Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;
  // Profile size rounded to cache line
  const size_t Aligned_Profile_Length = (Profile_Length+1 + 15) & ~15;
  int * restrict const TIMatch = _mm_malloc(Aligned_Profile_Length*Alphabet_Length*sizeof(int),64);
  
  if (TIMatch == NULL) return TIMatch;
  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(int));
  
  CHANGE(int);

  return TIMatch;
}

static inline void TransposeAndConvertMatchMatrixGivenMemory(int * const restrict TIMatch, const union Scores * const Matrices,
							     const size_t Alphabet_Length, const size_t Profile_Length,
							     const size_t Aligned_Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;
 
  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(int));
  CHANGE(int);
}

static inline float * TransposeAndConvertToFloatMatchMatrix(const union Scores * const Matrices, const size_t Alphabet_Length,
							    const size_t Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;
  // Profile size rounded to cache linerices
  const size_t Aligned_Profile_Length = (Profile_Length+1 + 15) & ~15;
  float * restrict const TIMatch = _mm_malloc(Aligned_Profile_Length*Alphabet_Length*sizeof(float),64);
 
  if (TIMatch == NULL) return TIMatch;
  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(float));
  
  CHANGE(float);

  return TIMatch;
}

static inline void TransposeAndConvertToFloatMatchMatrixGivenMemory(float * const restrict TIMatch, const union Scores * const Matrices,
							            const size_t Alphabet_Length, const size_t Profile_Length,
							            const size_t Aligned_Profile_Length)
{
  const size_t step = Matrices->Match.AlignStep;

  memset(TIMatch, 0, Aligned_Profile_Length*Alphabet_Length*sizeof(float));
  
  CHANGE(float);
}
#endif

static inline void WeightMatchMatrix(float * const restrict TIMatch, const struct Profile * prf,
				     const size_t Alphabet_Length, const size_t Profile_Length)
{
  float DatabaseWeightedScores[] = { 
    0.0783f, 0.0000f, 0.0236f, 0.0496f, 0.0581f, 0.0463f, 0.0749f, 0.0248f,
    0.0621f, 0.0000f, 0.0602f, 0.0939f, 0.0251f, 0.0421f, 0.0000f, 0.0426f,
    0.0360f, 0.0524f, 0.0612f, 0.0523f, 0.0000f, 0.0700f, 0.0132f, 0.0000f,
    0.0339f, 0.0000f
  };
  
  /* 
   * Allocate on the stack a Database scaling factor for each letter in profile alphabet
   * alphabet + 1 (+1 is for the zero position being the unknown)
   */
  float * const restrict DatabaseFactor = (float*) alloca((Alphabet_Length)*sizeof(float));
  for (size_t alpha=0; alpha<Alphabet_Length; ++alpha) {
    const unsigned char letter = (unsigned char) prf->CABC[alpha];
    register size_t index = (size_t) ( (letter >= (unsigned char) 'a' ) ? letter - ((unsigned char) 'a') : letter - ((unsigned char) 'A') );
    
//     fprintf(stderr,"Alphabet %lu : %1c\t%lu\n",alpha,prf->CABC[alpha],index);
    if ( index < 26 ) {
      DatabaseFactor[alpha] = DatabaseWeightedScores[index];
    } else {
      fprintf(stderr,"Database weight scores does not contain all alphabet letters, %1c is missing\n", prf->CABC[alpha]);
      exit(1);
    }
    
  }
  /* Alignment of profile pn cache line (64 bytes) */
  const size_t Aligned_Profile_Length = (Profile_Length+1 + 15) & ~15;
  
  /* We allocate here on the stack, so beware */
  //float * const restrict AlphabetSum = (float*) alloca(Alphabet_Length*sizeof(float));
  float * const restrict ProfileSum = (float*) alloca(Aligned_Profile_Length*sizeof(float));
  memset(ProfileSum, 0, Aligned_Profile_Length*sizeof(float));
  
  
  
  /* Compute overall sum and sum per letter */
  register float Sum = 0.0f;
  for (size_t alpha=0; alpha<Alphabet_Length; ++alpha) {
    register float tSum = 0.0f; 
    //#pragma unroll(4)
    //for (size_t iprf = 0; iprf<Profile_Length; ++iprf) tSum += TIMatch[alpha*Aligned_Profile_Length+iprf];
    //AlphabetSum[alpha] = tSum;
#pragma unroll(4)
    for (size_t iprf=0; iprf<Profile_Length; ++iprf) {
	ProfileSum[iprf] += TIMatch[alpha*Aligned_Profile_Length+iprf];
	tSum             += TIMatch[alpha*Aligned_Profile_Length+iprf];
    }
    Sum += tSum;
  }
  
  /* Normalize factor and include database frequency as well */
  const register float Norm = 1.0f/Sum;
//   for (size_t alpha=0; alpha<Alphabet_Length; ++alpha) {
//     AlphabetSum[alpha] *= Norm*DatabaseFactor[alpha];
//   }
  for (size_t iprf = 0; iprf<Profile_Length; ++iprf) ProfileSum[iprf] *= Norm;
  
  /* Apply the scaling to the Match matrix */
  for (size_t alpha=0; alpha<Alphabet_Length; ++alpha) {
    //const register float scale = AlphabetSum[alpha]; 
    //#pragma unroll(4)
    //for (size_t iprf = 0; iprf<Profile_Length; ++iprf) TIMatch[alpha*Aligned_Profile_Length+iprf] *= scale;
    const register float scale = DatabaseFactor[alpha];
    for (size_t iprf = 0; iprf<Profile_Length; ++iprf) TIMatch[alpha*Aligned_Profile_Length+iprf] *= (1.0f - ProfileSum[iprf]*scale);
  }
}


static inline void FreeScores(union Scores * const matrices)
{
  if (matrices->Insertion.Alphabet != NULL) _mm_free(matrices->Insertion.Alphabet);
  if (matrices->Insertion.Boundaries != NULL) _mm_free(matrices->Insertion.Boundaries);
  if (matrices->Insertion.Transitions != NULL) _mm_free(matrices->Insertion.Transitions);
  if (matrices->Insertion.FirstSequenceProtein != NULL) _mm_free(matrices->Insertion.FirstSequenceProtein);
  if (matrices->Insertion.LastSequenceProtein != NULL) _mm_free(matrices->Insertion.LastSequenceProtein);
  matrices->Insertion.AlignStep = 0;
  if (matrices->Match.Alphabet != NULL) _mm_free(matrices->Match.Alphabet);
}

static inline void NextInsertionProfile( struct SInsertion * Insertion)
{
  const size_t step       = Insertion->AlignStep;
  Insertion->Alphabet    += step;
  Insertion->Boundaries  += INSERTION_BOUNDARIES_SIZE;
  Insertion->Transitions ++;
}

static inline void PreviousInsertionProfile( struct SInsertion * Insertion)
{
  const size_t step       = Insertion->AlignStep;
  Insertion->Alphabet    -= step;
  Insertion->Boundaries  -= INSERTION_BOUNDARIES_SIZE;
  Insertion->Transitions --; 
}

static inline void CopyPreviousInsertionProfile(struct SInsertion * Insertion)
{
  const size_t step       = Insertion->AlignStep;
  memcpy(Insertion->Alphabet, Insertion->Alphabet - step, step*sizeof(short int));
  memcpy(Insertion->Boundaries, Insertion->Boundaries - INSERTION_BOUNDARIES_SIZE, INSERTION_BOUNDARIES_SIZE*sizeof(short int));
  memcpy(Insertion->Transitions, Insertion->Transitions - 1, sizeof(TransitionScores));
}

static inline void NextMatchProfile( struct SMatch * Match)
{
  const size_t step = Match->AlignStep;
  Match->Alphabet  += step;
}

static inline void PreviousMatchProfile( struct SMatch * Match)
{
  const size_t step = Match->AlignStep;
  Match->Alphabet  -= step;
}

static size_t GetInsertionMemory(const char * const key, struct SInsertion * const Insertion, short int ** pointer)
{
  /* return 0 if single value, 1 if vector and 2 if error */

  short int * Alphabet    = Insertion->Alphabet;
  short int * Boundaries  = Insertion->Boundaries;
  short int * Transitions = Insertion->Transitions->Element;
  
  switch(key[0]) {
    case 'I':
      switch(key[1]) {
        case '\0': *pointer = Alphabet    +   1; return 1; break;
        case '0' : *pointer = Alphabet    +   0; return 0; break;
        case 'M' : *pointer = Transitions + _IM; return 0; break;
        case 'I' : *pointer = Transitions + _II; return 0; break;
        case 'D' : *pointer = Transitions + _ID; return 0; break;
        case 'E' : *pointer = Boundaries  + _IE; return 0; break;
        default  : return 2;
      };
      break;
    case 'B':
      switch(key[1]) {
        case '0' : *pointer = Boundaries + _B0; return 0; break;
        case '1' : *pointer = Boundaries + _B1; return 0; break;
        case 'M' : *pointer = Boundaries + _BM; return 0; break;
        case 'I' : *pointer = Boundaries + _BI; return 0; break;
        case 'D' : *pointer = Boundaries + _BD; return 0; break;
        case 'E' : *pointer = Boundaries + _BE; return 0; break;
        default  : return 2;
      };
      break;
    case 'E':
      switch(key[1]) {
        case '0' : *pointer = Boundaries + _E0; return 0; break;
        case '1' : *pointer = Boundaries + _E1; return 0; break;
        default  : return 2;
      };
      break;
    case 'M':
      switch(key[1]) {
        case 'M' : *pointer = Transitions + _MM; return 0; break;
        case 'I' : *pointer = Transitions + _MI; return 0; break;
        case 'D' : *pointer = Transitions + _MD; return 0; break;
	case 'E' : *pointer = Boundaries  + _ME; return 0; break;
        default  : return 2;
      };
      break;
    case 'D':
      switch(key[1]) {
        case 'M' : *pointer = Transitions + _DM; return 0; break;
        case 'I' : *pointer = Transitions + _DI; return 0; break;
        case 'D' : *pointer = Transitions + _DD; return 0; break;
        case 'E' : *pointer = Boundaries  + _DE; return 0; break;
        default  : return 2;
      };
      break;
    default:
      return 2;
  };
}

int ReadProfile(const char * const restrict FileName, struct Profile * const prf);
void FreeProfile(struct Profile * const prf);
int xali1_sse2 (const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
                int * const WORK, const size_t BSEQ, const size_t LSEQ, const int CutOff, const _Bool LOPT);
int xali1_sse41(const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
                int * const WORK, const size_t BSEQ, const size_t LSEQ, const int CutOff, const _Bool LOPT);
int xali1_sse4a(const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
                int * const WORK, const size_t BSEQ, const size_t LSEQ, const int CutOff, const _Bool LOPT);
int xalip_sse2(const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
	       union lScores * const restrict iop, union Positions * const restrict iom,
	       union Positions * const restrict ioi, const size_t bseq, const size_t lseq,
	       struct Alignment * const restrict alignment,
	       _Bool * const restrict Lock, const size_t N1, const size_t N2, const _Bool Lopt,
	       const int kcut, const size_t nali);
int xalip_sse41(const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
		union lScores * const restrict iop, union Positions * const restrict iom,
		union Positions * const restrict ioi, const size_t bseq, const size_t lseq,
		struct Alignment * const restrict alignment,
		_Bool * const restrict Lock, const size_t N1, const size_t N2, const _Bool Lopt,
		const int kcut, const size_t nali);
int xalit_sse2(const struct Profile * const restrict prf, const size_t N1, const size_t N2, const size_t bseq, const size_t lseq,
	       const unsigned char * const restrict Sequence, char * const restrict CALI,
	       union lScores * const restrict iop,
	       struct Alignment * const restrict alignment, const _Bool * const restrict Lock);       
int xalit_sse41(const struct Profile * const restrict prf, const size_t N1, const size_t N2, const size_t bseq, const size_t lseq,
		const unsigned char * const restrict Sequence, char * const restrict CALI,
		union lScores * const restrict iop,
		struct Alignment * const restrict alignment, const _Bool * const restrict Lock);           

unsigned int heuristic(const struct Profile * const restrict prf, const PFSequence * const restrict Sequence,
		       const unsigned int CutOff);
unsigned int TransposeHeuristic_sse2(const float * const restrict TransposeMatch, const size_t Alphabet_Length,
				     const size_t Profile_Length, const PFSequence * const restrict Sequence);
unsigned int TransposeHeuristic_sse41(const int * const restrict TransposeMatch, const size_t Alphabet_Length,
				      const size_t Profile_Length, const PFSequence * const restrict Sequence);
unsigned int TransposeHeuristicGivenMemory_sse2(const float * const restrict TransposeMatch, float * const Memory,
						const size_t Alphabet_Length, const size_t Profile_Length,
						const PFSequence * const restrict Sequence);
unsigned int TransposeHeuristicGivenMemory_sse41(const int * const restrict TransposeMatch, int * const Memory,
						 const size_t Alphabet_Length, const size_t Profile_Length,
						 const PFSequence * const restrict Sequence);

void InitAverage(const union Scores * Matrices, const size_t prfLength, const size_t AlphabetLength, SAverage * const Average);
void FreeAverage(SAverage * const Average);
float ComputeAverageFrequencies(const PFSequence * const Sequence, SAverage * const Average);

int N2R_1(const float R, const size_t SeqLength, const float AverageValue);
int N2R_2(const float R, const size_t SeqLength, const float AverageValue);
int N2R_3(const float R, const size_t SeqLength, const float AverageValue);
float R2N_1(const int N, const size_t SeqLength, const float AverageValue);
float R2N_2(const int N, const size_t SeqLength, const float AverageValue);
float R2N_3(const int N, const size_t SeqLength, const float AverageValue);

/******************* Output declarations *******************/
typedef void (*PrintFunctionPtr)(const struct Profile * const, const char * * const, const struct Alignment * const,
				 char * const, const size_t, const float, const int);
void PrintSimple(const struct Profile * const prf, const char * * const AlignedSequence,
		 const struct Alignment * const alignment, char * const Header,
		 const size_t SequenceLength, const float RAVE, const int N);

void PrintDefault(const struct Profile * const prf, const char * * const AlignedSequence,
		 const struct Alignment * const alignment, char * const Header,
		 const size_t SequenceLength, const float RAVE, const int N);

void PrintInterpro(const struct Profile * const prf, const char * * const AlignedSequence,
		 const struct Alignment * const alignment, char * const Header,
		 const size_t SequenceLength, const float RAVE, const int N);

void PrintPfscan(const struct Profile * const prf, const char * * const AlignedSequence,
		 const struct Alignment * const alignment, char * const Header,
		 const size_t SequenceLength, const float RAVE, const int N);

void PrintTSV(const struct Profile * const prf, const char * * const AlignedSequence,
		 const struct Alignment * const alignment, char * const Header,
		 const size_t SequenceLength, const float RAVE, const int N);
#endif

