/*
 * Experimental data  distribution table generator
 * Taken from the uncopyrighted NISTnet code (public domain).
 *
 * Read in a series of "random" data values, either
 * experimentally or generated from some probability distribution.
 * From this, create the inverse distribution table used to approximate
 * the distribution.
 */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <malloc.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>


double *
readdoubles(FILE *fp, int *number)
{
	struct stat info;
	double *x;
	int limit;
	int n=0, i;

	fstat(fileno(fp), &info);
	if (info.st_size > 0) {
		limit = 2*info.st_size/sizeof(double);	/* @@ approximate */
	} else {
		limit = 10000;
	}

	x = calloc(limit, sizeof(double));
	if (!x) {
		perror("double alloc");
		exit(3);
	}

	for (i=0; i<limit; ++i){
		fscanf(fp, "%lf", &x[i]);
		if (feof(fp))
			break;
		++n;
	}
	*number = n;
	return x;
}

void
arraystats(double *x, int limit, double *mu, double *sigma, double *rho)
{
	int n=0, i;
	double sumsquare=0.0, sum=0.0, top=0.0;
	double sigma2=0.0;

	for (i=0; i<limit; ++i){
		sumsquare += x[i]*x[i];
		sum += x[i];
		++n;
	}
	*mu = sum/(double)n;
	*sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1));

	for (i=1; i < n; ++i){
		top += ((double)x[i]- *mu)*((double)x[i-1]- *mu);
		sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu);

	}
	*rho = top/sigma2;
}

/* Create a (normalized) distribution table from a set of observed
 * values.  The table is fixed to run from (as it happens) -4 to +4,
 * with granularity .00002.
 */

#define TABLESIZE	16384/4
#define TABLEFACTOR	8192
#ifndef MINSHORT
#define MINSHORT	-32768
#define MAXSHORT	32767
#endif

/* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger
 * than MAXSHORT, we don't bother looking at a larger domain than this:
 */
#define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1)
#define DISTTABLEGRANULARITY 50000
#define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2)

static int *
makedist(double *x, int limit, double mu, double sigma)
{
	int *table;
	int i, index, first=DISTTABLESIZE, last=0;
	double input;

	table = calloc(DISTTABLESIZE, sizeof(int));
	if (!table) {
		perror("table alloc");
		exit(3);
	}

	for (i=0; i < limit; ++i) {
		/* Normalize value */
		input = (x[i]-mu)/sigma;

		index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY);
		if (index < 0) index = 0;
		if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1;
		++table[index];
		if (index > last)
			last = index +1;
		if (index < first)
			first = index;
	}
	return table;
}

/* replace an array by its cumulative distribution */
static void
cumulativedist(int *table, int limit, int *total)
{
	int accum=0;

	while (--limit >= 0) {
		accum += *table;
		*table++ = accum;
	}
	*total = accum;
}

static short *
inverttable(int *table, int inversesize, int tablesize, int cumulative)
{
	int i, inverseindex, inversevalue;
	short *inverse;
	double findex, fvalue;

	inverse = (short *)malloc(inversesize*sizeof(short));
	for (i=0; i < inversesize; ++i) {
		inverse[i] = MINSHORT;
	}
	for (i=0; i < tablesize; ++i) {
		findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN;
		fvalue = (double)table[i]/(double)cumulative;
		inverseindex = (int)rint(fvalue*inversesize);
		inversevalue = (int)rint(findex*TABLEFACTOR);
		if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1;
		if (inversevalue > MAXSHORT) inversevalue = MAXSHORT;
		inverse[inverseindex] = inversevalue;
	}
	return inverse;

}

/* Run simple linear interpolation over the table to fill in missing entries */
static void
interpolatetable(short *table, int limit)
{
	int i, j, last, lasti = -1;

	last = MINSHORT;
	for (i=0; i < limit; ++i) {
		if (table[i] == MINSHORT) {
			for (j=i; j < limit; ++j)
				if (table[j] != MINSHORT)
					break;
			if (j < limit) {
				table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti);
			} else {
				table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti);
			}
		} else {
			last = table[i];
			lasti = i;
		}
	}
}

static void
printtable(const short *table, int limit)
{
	int i;

	printf("# This is the distribution table for the experimental distribution.\n");

	for (i=0 ; i < limit; ++i) {
		printf("%d%c", table[i],
		       (i % 8) == 7 ? '\n' : ' ');
	}
}

int
main(int argc, char **argv)
{
	FILE *fp;
	double *x;
	double mu, sigma, rho;
	int limit;
	int *table;
	short *inverse;
	int total;

	if (argc > 1) {
		if (!(fp = fopen(argv[1], "r"))) {
			perror(argv[1]);
			exit(1);
		}
	} else {
		fp = stdin;
	}				
	x = readdoubles(fp, &limit);
	if (limit <= 0) {
		fprintf(stderr, "Nothing much read!\n");
		exit(2);
	}
	arraystats(x, limit, &mu, &sigma, &rho);
#ifdef DEBUG
	fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n",
		limit, mu, sigma, rho);
#endif
	
	table = makedist(x, limit, mu, sigma);
	free((void *) x);
	cumulativedist(table, DISTTABLESIZE, &total);
	inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total);
	interpolatetable(inverse, TABLESIZE);
	printtable(inverse, TABLESIZE);
	return 0;
}