/* discrete.c
 * This file is a modification of discrete.c from gsl library.
 * Modified by G.Beliakov (gleb@deakin.edu.au) for ranlip package 
 * 
 * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
   Random Discrete Events
   
   Given K discrete events with different probabilities P[k]
   produce a value k consistent with its probability.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.  You should have received
   a copy of the GNU General Public License along with this program;
   if not, write to the Free Foundation, Inc., 59 Temple Place, Suite
   330, Boston, MA 02111-1307 USA
*/

/*
 * Based on: Alastair J Walker, An efficient method for generating
 * discrete random variables with general distributions, ACM Trans
 * Math Soft 3, 253-256 (1977).  See also: D. E. Knuth, The Art of
 * Computer Programming, Volume 2 (Seminumerical algorithms), 3rd
 * edition, Addison-Wesley (1997), p120.

 * Walker's algorithm does some preprocessing, and provides two
 * arrays: floating point F[k] and integer A[k].  A value k is chosen
 * from 0..K-1 with equal likelihood, and then a uniform random number
 * u is compared to F[k].  If it is less than F[k], then k is
 * returned.  Otherwise, A[k] is returned.
   
 * Walker's original paper describes an O(K^2) algorithm for setting
 * up the F and A arrays.  I found this disturbing since I wanted to
 * use very large values of K.  I'm sure I'm not the first to realize
 * this, but in fact the preprocessing can be done in O(K) steps.

 * A figure of merit for the preprocessing is the average value for
 * the F[k]'s (that is, SUM_k F[k]/K); this corresponds to the
 * probability that k is returned, instead of A[k], thereby saving a
 * redirection.  Walker's O(K^2) preprocessing will generally improve
 * that figure of merit, compared to my cheaper O(K) method; from some
 * experiments with a perl script, I get values of around 0.6 for my
 * method and just under 0.75 for Walker's.  Knuth has pointed out
 * that finding _the_ optimum lookup tables, which maximize the
 * average F[k], is a combinatorially difficult problem.  But any
 * valid preprocessing will still provide O(1) time for the call to
 * gsl_ran_discrete().  I find that if I artificially set F[k]=1 --
 * ie, better than optimum! -- I get a speedup of maybe 20%, so that's
 * the maximum I could expect from the most expensive preprocessing.
 * Folding in the difference of 0.6 vs 0.75, I'd estimate that the
 * speedup would be less than 10%.

 * I've not implemented it here, but one compromise is to sort the
 * probabilities once, and then work from the two ends inward.  This
 * requires O(K log K), still lots cheaper than O(K^2), and from my
 * experiments with the perl script, the figure of merit is within
 * about 0.01 for K up to 1000, and no sign of diverging (in fact,
 * they seemed to be converging, but it's hard to say with just a
 * handful of runs).

 * The O(K) algorithm goes through all the p_k's and decides if they
 * are "smalls" or "bigs" according to whether they are less than or
 * greater than the mean value 1/K.  The indices to the smalls and the
 * bigs are put in separate stacks, and then we work through the
 * stacks together.  For each small, we pair it up with the next big
 * in the stack (Walker always wanted to pair up the smallest small
 * with the biggest big).  The small "borrows" from the big just
 * enough to bring the small up to mean.  This reduces the size of the
 * big, so the (smaller) big is compared again to the mean, and if it
 * is smaller, it gets "popped" from the big stack and "pushed" to the
 * small stack.  Otherwise, it stays put.  Since every time we pop a
 * small, we are able to deal with it right then and there, and we
 * never have to pop more than K smalls, then the algorithm is O(K).

 * This implementation sets up two separate stacks, and allocates K
 * elements between them.  Since neither stack ever grows, we do an
 * extra O(K) pass through the data to determine how many smalls and
 * bigs there are to begin with and allocate appropriately.  In all
 * there are 2*K*sizeof(double) transient bytes of memory that are
 * used than returned, and K*(sizeof(int)+sizeof(double)) bytes used
 * in the lookup table.
   
 * Walker spoke of using two random numbers (an integer 0..K-1, and a
 * floating point u in [0,1]), but Knuth points out that one can just
 * use the integer and fractional parts of K*u where u is in [0,1].
 * In fact, Knuth further notes that taking F'[k]=(k+F[k])/K, one can
 * directly compare u to F'[k] without having to explicitly set
 * u=K*u-int(K*u).

 * Usage:

 * Starting with an array of probabilities P, initialize and do
 * preprocessing with a call to:

 *    gsl_rng *r;
 *    gsl_ran_discrete_t *f;
 *    f = gsl_ran_discrete_preproc(K,P);
   
 * Then, whenever a random index 0..K-1 is desired, use

 *    k = gsl_ran_discrete(r,f);

 * Note that several different randevent struct's can be
 * simultaneously active.

 * Aside: A very clever alternative approach is described in
 * Abramowitz and Stegun, p 950, citing: Marsaglia, Random variables
 * and computers, Proc Third Prague Conference in Probability Theory,
 * 1962.  A more accesible reference is: G. Marsaglia, Generating
 * discrete random numbers in a computer, Comm ACM 6, 37-38 (1963).
 * If anybody is interested, I (jt) have also coded up this version as
 * part of another software package.  However, I've done some
 * comparisons, and the Walker method is both faster and more stingy
 * with memory.  So, in the end I decided not to include it with the
 * GSL package.
   
 * Written 26 Jan 1999, James Theiler, jt@lanl.gov
 * Adapted to GSL, 30 Jan 1999, jt

 */

//#include <config.h>
#include <stdio.h>              /* used for NULL, also fprintf(stderr,...) */
#include <stdlib.h>             /* used for malloc's */
#include <math.h>
//#include <gsl/gsl_rng.h>
#include "ranlipdist.h"
#define DEBUG 0
#define KNUTH_CONVENTION 1      /* Saves a few steps of arithmetic
                                 * in the call to gsl_ran_discrete()
                                 */

/*** Begin Stack (this code is used just in this file) ***/

/* Stack code converted to use unsigned indices (i.e. s->i == 0 now
   means an empty stack, instead of -1), for consistency and to give a
   bigger allowable range. BJG */

typedef struct {
    size_t N;                      /* max number of elts on stack */
    size_t *v;                     /* array of values on the stack */
    size_t i;                      /* index of top of stack */
} gsl_stack_t;

static gsl_stack_t *
new_stack(size_t N) {
    gsl_stack_t *s;
    s = (gsl_stack_t *)malloc(sizeof(gsl_stack_t));
    s->N = N;
    s->i = 0;                  /* indicates stack is empty */
    s->v = (size_t *)malloc(sizeof(size_t)*N);
    return s;
}

// for ranlux
static ranlux_state_t RLSTATE;

int TheSeed=1;




static void
push_stack(gsl_stack_t *s, size_t v)
{
    if ((s->i) >= (s->N)) {
        fprintf(stderr,"Cannot push stack!\n");
        abort();                /* FIXME: fatal!! */
    }
    (s->v)[s->i] = v;
    s->i += 1;
}

static size_t pop_stack(gsl_stack_t *s)
{
    if ((s->i) == 0) {
        fprintf(stderr,"Cannot pop stack!\n");
        abort();               /* FIXME: fatal!! */
    }
    s->i -= 1;
    return ((s->v)[s->i]);
}

static  size_t size_stack(const gsl_stack_t *s)
{
    return s->i;
}

static void free_stack(gsl_stack_t *s)
{
    free((char *)(s->v));
    free((char *)s);
}

/*** End Stack ***/


/*** Begin Walker's Algorithm ***/



gsl_ran_discrete_t *
gsl_ran_discrete_preproc(size_t Kevents, const double *ProbArray)
{

    size_t k,b,s;
    gsl_ran_discrete_t *g;
    size_t nBigs, nSmalls;
    gsl_stack_t *Bigs;
    gsl_stack_t *Smalls;
    double *E;
    double pTotal = 0.0, mean, d;

    
    if (Kevents < 1) {;
      /* Could probably treat Kevents=1 as a special case */

 /*     GSL_ERROR_VAL ("number of events must be a positive integer", 
			GSL_EINVAL, 0);
			*/
    }

    /* Make sure elements of ProbArray[] are positive.
     * Won't enforce that sum is unity; instead will just normalize
     */

    for (k=0; k<Kevents; ++k) {
        if (ProbArray[k] < 0) {;
	//  GSL_ERROR_VAL ("probabilities must be non-negative",
	//		    GSL_EINVAL, 0) ;
        }
        pTotal += ProbArray[k];
    }

    /* Begin setting up the main "object" (just a struct, no steroids) */
    g = (gsl_ran_discrete_t *)malloc(sizeof(gsl_ran_discrete_t));
    g->K = Kevents;
    g->F = (double *)malloc(sizeof(double)*Kevents);
    g->A = (size_t *)malloc(sizeof(size_t)*Kevents);

    E = (double *)malloc(sizeof(double)*Kevents);

    if (E==NULL) {
 //     GSL_ERROR_VAL ("Cannot allocate memory for randevent", ENOMEM, 0);
    }

    for (k=0; k<Kevents; ++k) {
        E[k] = ProbArray[k]/pTotal;
    }

    /* Now create the Bigs and the Smalls */
    mean = 1.0/Kevents;
    nSmalls=nBigs=0;
    for (k=0; k<Kevents; ++k) {
        if (E[k] < mean) ++nSmalls;
        else             ++nBigs;
    }
    Bigs   = new_stack(nBigs);
    Smalls = new_stack(nSmalls);
    for (k=0; k<Kevents; ++k) {
        if (E[k] < mean) {
            push_stack(Smalls,k);
        }
        else {
            push_stack(Bigs,k);
        }
    }
    /* Now work through the smalls */
    while (size_stack(Smalls) > 0) {
        s = pop_stack(Smalls);
        if (size_stack(Bigs) == 0) {
            /* Then we are on our last value */
            (g->A)[s]=s;
            (g->F)[s]=1.0;
            break;
        }
        b = pop_stack(Bigs);
        (g->A)[s]=b;
        (g->F)[s]=Kevents*E[s];
#if DEBUG
        fprintf(stderr,"s=%2d, A=%2d, F=%.4f\n",s,(g->A)[s],(g->F)[s]);
#endif        
        d = mean - E[s];
        E[s] += d;              /* now E[s] == mean */
        E[b] -= d;
        if (E[b] < mean) {
            push_stack(Smalls,b); /* no longer big, join ranks of the small */
        }
        else if (E[b] > mean) {
            push_stack(Bigs,b); /* still big, put it back where you found it */
        }
        else {
            /* E[b]==mean implies it is finished too */
            (g->A)[b]=b;
            (g->F)[b]=1.0;
        }
    }
    while (size_stack(Bigs) > 0) {
        b = pop_stack(Bigs);
        (g->A)[b]=b;
        (g->F)[b]=1.0;
    }
    /* Stacks have been emptied, and A and F have been filled */

    
#if 0
    /* if 1, then artificially set all F[k]'s to unity.  This will
     * give wrong answers, but you'll get them faster.  But, not
     * that much faster (I get maybe 20%); that's an upper bound
     * on what the optimal preprocessing would give.
     */
    for (k=0; k<Kevents; ++k) {
        (g->F)[k] = 1.0;
    }
#endif

#if KNUTH_CONVENTION
    /* For convenience, set F'[k]=(k+F[k])/K */
    /* This saves some arithmetic in gsl_ran_discrete(); I find that
     * it doesn't actually make much difference.
     */
    for (k=0; k<Kevents; ++k) {
        (g->F)[k] += k;
        (g->F)[k] /= Kevents;
    }
#endif    

    free_stack(Bigs);
    free_stack(Smalls);
    free((char *)E);

    return g;
}


size_t gsl_ran_discrete(/*const gsl_rng *r,*/ const gsl_ran_discrete_t *g)
{
    size_t c=0;
    double u,f;
	u=ranlux_get_double(&RLSTATE);
#if KNUTH_CONVENTION
    c = (u*(g->K));
#else
    u *= g->K;
    c = u;
    u -= c;
#endif
    f = (g->F)[c];
    /* fprintf(stderr,"c,f,u: %d %.4f %f\n",c,f,u); */
    if (f == 1.0) return c;

    if (u < f) {
        return c;
    }
    else {
        return (g->A)[c];
    }
}

void gsl_ran_discrete_free(gsl_ran_discrete_t *g)
{
    free((char *)(g->A));
    free((char *)(g->F));
    free((char *)g);
}

double
gsl_ran_discrete_pdf(size_t k, const gsl_ran_discrete_t *g)
{
    size_t i,K;
    double f,p=0;
    K= g->K;
    if (k>K) return 0;
    for (i=0; i<K; ++i) {
        f = (g->F)[i];
#if KNUTH_CONVENTION
        f = K*f-i;
#endif        
        if (i==k) {
            p += f;
        } else if (k == (g->A)[i]) {
            p += 1.0 - f;
        }
    }
    return p/K;
}

/* rng/ranlux.c
 * 
 * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

//#include <config.h>
//#include <stdlib.h>
//#include <gsl/gsl_rng.h>

/* This is a lagged fibonacci generator with skipping developed by Luescher.
   The sequence is a series of 24-bit integers, x_n, 

   x_n = d_n + b_n

   where d_n = x_{n-10} - x_{n-24} - c_{n-1}, b_n = 0 if d_n >= 0 and
   b_n = 2^24 if d_n < 0, c_n = 0 if d_n >= 0 and c_n = 1 if d_n < 0,
   where after 24 samples a group of p integers are "skipped", to
   reduce correlations. By default p = 199, but can be increased to
   365.

   The period of the generator is around 10^171. 

   From: M. Luescher, "A portable high-quality random number generator
   for lattice field theory calculations", Computer Physics
   Communications, 79 (1994) 100-110.

   Available on the net as hep-lat/9309020 at http://xxx.lanl.gov/

   See also,

   F. James, "RANLUX: A Fortran implementation of the high-quality
   pseudo-random number generator of Luscher", Computer Physics
   Communications, 79 (1994) 111-114

   Kenneth G. Hamilton, F. James, "Acceleration of RANLUX", Computer
   Physics Communications, 101 (1997) 241-248

   Kenneth G. Hamilton, "Assembler RANLUX for PCs", Computer Physics
   Communications, 101 (1997) 249-253  */


static  unsigned long int
increment_state (ranlux_state_t * state)
{
  unsigned int i = state->i;
  unsigned int j = state->j;
  long int delta = state->u[j] - state->u[i] - state->carry;

  if (delta & mask_hi)
    {
      state->carry = 1;
      delta &= mask_lo;
    }
  else
    {
      state->carry = 0;
    }

  state->u[i] = delta;

  if (i == 0)
    {
      i = 23;
    }
  else
    {
      i--;
    }

  state->i = i;

  if (j == 0)
    {
      j = 23;
    }
  else
    {
      j--;
    }

  state->j = j;

  return delta;
}

static  unsigned long int
ranlux_get (void *vstate)
{
  ranlux_state_t *state = (ranlux_state_t *) vstate;
  const unsigned int skip = state->skip;
  unsigned long int r = increment_state (state);

  state->n++;

  if (state->n == 24)
    {
      unsigned int i;
      state->n = 0;
      for (i = 0; i < skip; i++)
	increment_state (state);
    }

  return r;
}

static double
ranlux_get_double (void *vstate)
{
  return ranlux_get (vstate) / 16777216.01; //GB: we need random numbers on [0,1), not [0,1]!
}

double ranlux_get_double_V() 
{
	return ranlux_get_double(&RLSTATE);
}

static void
ranlux_set_lux (void *vstate, unsigned long int s, unsigned int luxury)
{
  ranlux_state_t *state = (ranlux_state_t *) vstate;
  int i;

  long int seed;

  if (s == 0)
    s = 314159265;	/* default seed is 314159265 */

  seed = s;

  /* This is the initialization algorithm of F. James, widely in use
     for RANLUX. */

  for (i = 0; i < 24; i++)
    {
      unsigned long int k = seed / 53668;
      seed = 40014 * (seed - k * 53668) - k * 12211;
      if (seed < 0)
	{
	  seed += 2147483563;
	}
      state->u[i] = seed % two24;
    }

  state->i = 23;
  state->j = 9;
  state->n = 0;
  state->skip = luxury - 24;

  if (state->u[23] & mask_hi)
    {
      state->carry = 1;
    }
  else
    {
      state->carry = 0;
    }
}

static void
ranlux_set (void *vstate, unsigned long int s)
{
  ranlux_set_lux (vstate, s, 223);
}

void ranlux_set_seed (unsigned long int s)
{
  ranlux_set_lux (&RLSTATE, s, 223);
}

static void
ranlux389_set (void *vstate, unsigned long int s)
{
  ranlux_set_lux (vstate, s, 389);
}

