#!/usr/bin/perl -w

##############################################################################
#
# Print billing management system - stats analysis tool, version 4.1.2
#
# Copyright (C) 2002, 2003 Daniel Franklin
#
# This program is distributed under the terms of the GNU General Public
# License Version 2.
#
# This script should be called from a cron-job running as lpd user. Various
# PNG files are written out, plus textual stats information is dumped to
# stdout.
#
##############################################################################

use strict;
use Printbill::printbill_pcfg;
use Getopt::Long;
use POSIX;
use Printbill::PTDB_File;

# This script should be called from a cron-job running as lpd user. Two
# PNG files are written out.

my $config = '/etc/printbill/printbillrc';
my %params = pcfg ($config);
my ($nslices, %opt, $printer, @stats, $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst, $now, @weekly, @monthly, @annually, @overall, $i);
my (@ptime, @ctime, @filesize, @pages, @cyan, @magenta, @yellow, @black, @dist, $path, $png_url, @time_per_page, %printerhash, $colourspace, $fraction);
my (@cyan_coverage_per_page, @magenta_coverage_per_page, @yellow_coverage_per_page, @black_coverage_per_page, $web);

if ($] >= 5.005) {
       Getopt::Long::Configure ("pass_through");
       Getopt::Long::Configure ("bundling");
} else {
       Getopt::Long::config ("pass_through");
       Getopt::Long::config ("bundling");
}

$printer = "lp";
$fraction = 1;
$nslices = 20;
$path = ".";

GetOptions (\%opt, "fraction=f" => \$fraction, "printer=s" => \$printer, "help!", "slices=i" => \$nslices, "path=s" => \$path, "png_url=s" => \$png_url, "web!");

if ($opt{help}) {
	print "\n\n$0 - generate stats and graphs for specified printer. Options:

	--printer <printer>
		generate stats for printer <printer>

	--path <path>
		writes PNG files out to path <path>
			
	--fraction <fraction>
		only consider the <fraction> smallest values (e.g. .95)

	--slices <slices>
		use <slices> bins in frequency histograms

	--web --png_url <url>

		generates html instead of plain text, also <url> should be
		the same location as that specified for --path, but
		expressed relative to the document root of the web server
		(e.g., if --path is /var/www/printbill, --png_url should be
		/printbill)

	--help
		tells you this

";
	exit;
}

# Only produce CMY graphs for colour printers

tie %printerhash, "Printbill::PTDB_File", "$params{'db_home'}/printers/$printer.db", "TRUE"
	or die "$0: cannot open file $params{'db_home'}/printers/$printer.db: $!\n";

$colourspace = $printerhash{"colourspace"};

untie %printerhash;

# Read stats file

$now = time;

open STATS, "$params{'stats_path'}/printbill_stats_$printer.dat"
	or die "$0: unable to open $params{'stats_path'}/printbill_stats_$printer.dat: $!\n";
	
for ($i = 0; $i < 24; $i++) {
	$weekly[$i] = 0;
	$monthly[$i] = 0;
	$annually[$i] = 0;
	$overall[$i] = 0;
}	

$i = 0;

while (<STATS>) {
	chomp;
	@stats = split ("\t");
	
	die "$0: must have exactly 10 entries in each row of the stats file.
This means you are using an obsolete data file and you should remove it.\n" if ($#stats != 10);

	($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime ($stats[0]);

# These stats occurred within the last week

	if ($now - $stats[0] <= 604800) {
		$weekly[$hour] += $stats[6];
	}
	
# These stats occurred within the last 30 days

	if ($now - $stats[0] <= 18144000) {
		$monthly[$hour] += $stats[6];
	}
	
# These stats occurred within the last year (365.25 days)

	if ($now - $stats[0] <= 220903200) {
		$annually[$hour] += $stats[6];
	}
	
	$overall[$hour] += $stats[6];

	$ptime[$i] = $stats[1] + $stats[2];
	$ctime[$i] = $stats[3] + $stats[4];
	$filesize[$i] = $stats [5];
	$pages[$i] = $stats[6];
	$cyan[$i] = $stats[7];
	$magenta[$i] = $stats[8];
	$yellow[$i] = $stats[9];
	$black[$i] = $stats[10];
	
	$i++;
}

close STATS;


# Plot various load-over-time distributions

&plot ("$path/$printer" . "_weekly.png", "Hour", "Pages Printed", "[0:24]", @weekly);
&plot ("$path/$printer" . "_monthly.png", "Hour", "Pages Printed", "[0:24]", @monthly);
&plot ("$path/$printer" . "_annually.png", "Hour", "Pages Printed", "[0:24]", @annually);
&plot ("$path/$printer" . "_overall.png", "Hour", "Pages Printed", "[0:24]", @overall);

# Plot various statistical distributions

@dist = &hist ($nslices, $fraction, @ptime);
&plot ("$path/$printer" . "_ptime.png", "Parent Processing Time", "Frequency", "[:]", @dist);

@dist = &hist ($nslices, $fraction, @ctime);
&plot ("$path/$printer" . "_ctime.png", "Child Processing Time", "Frequency", "[:]", @dist);

@dist = &hist ($nslices, $fraction, @filesize);
&plot ("$path/$printer" . "_filesize.png", "File Size", "Frequency", "[:]", @dist);

@dist = &hist ($nslices, $fraction, @pages);
&plot ("$path/$printer" . "_pages.png", "Pages", "Frequency", "[:]", @dist);

if ($colourspace ne "mono") {
	@dist = &hist ($nslices, $fraction, @cyan);
	&plot ("$path/$printer" . "_cyan.png", "Total Cyan \% Coverage", "Frequency", "[:]", @dist);

	@dist = &hist ($nslices, $fraction, @magenta);
	&plot ("$path/$printer" . "_magenta.png", "Total Magenta \% Coverage", "Frequency", "[:]", @dist);

	@dist = &hist ($nslices, $fraction, @yellow);
	&plot ("$path/$printer" . "_yellow.png", "Total Yellow \% Coverage", "Frequency", "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$cyan_coverage_per_page[$i] = $cyan[$i] / $pages[$i];
		} else {
			$cyan_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @cyan_coverage_per_page);
	&plot ("$path/$printer" . "_cyan_per_page.png", "Average \% Cyan Coverage Per Page", "Frequency", "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$magenta_coverage_per_page[$i] = $magenta[$i] / $pages[$i];
		} else {
			$magenta_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @magenta_coverage_per_page);
	&plot ("$path/$printer" . "_magenta_per_page.png", "Average \% Magenta Coverage Per Page", "Frequency", "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$yellow_coverage_per_page[$i] = $yellow[$i] / $pages[$i];
		} else {
			$yellow_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @yellow_coverage_per_page);
	&plot ("$path/$printer" . "_yellow_per_page.png", "Average \% Yellow Coverage Per Page", "Frequency", "[:]", @dist);
}

if ($colourspace ne "cmy") {
	@dist = &hist ($nslices, $fraction, @black);
	&plot ("$path/$printer" . "_black.png", "Total Black \% Coverage", "Frequency", "[:]", @dist);

	for ($i = 0; $i < $#pages; $i++) {
		if ($pages[$i]) {
			$black_coverage_per_page[$i] = $black[$i] / $pages[$i];
		} else {
			$black_coverage_per_page[$i] = 0;
		}
	}

	@dist = &hist ($nslices, $fraction, @black_coverage_per_page);
	&plot ("$path/$printer" . "_black_per_page.png", "Average \% Black Coverage Per Page", "Frequency", "[:]", @dist);
}

if ($opt{'web'}) {
	if (!defined ($png_url)) {
		print "<p>You must specify --png_url <url> if you specify --web\n";
		exit 0;
	}

	print "<h1>Detailed usage information for printer \"$printer\"</h1>\n";

	print "<p><h3>Printing load over time</h3>";

	print "<table cellpadding=2 cellspacing=2 border=1>\n";
	print "<thead>\n";
	
	print "<td><b>Averaged Over</b></td>\n";
	print "<td><b>Graph</b></td>\n";
	print "</thead>\n<tbody>\n";

	print "<tr>\n<td>Last 7 Days</td>\n";
	print "<td><a href=\"$png_url/$printer\_weekly.png\">$printer\_weekly.png</a></td>\n";

	print "<tr>\n<td>Last 30 Days</td>\n";
	print "<td><a href=\"$png_url/$printer\_monthly.png\">$printer\_monthly.png</a></td>\n";

	print "<tr>\n<td>Last 365.25 Days</td>\n";
	print "<td><a href=\"$png_url/$printer\_annually.png\">$printer\_annually.png</a></td>\n";

	print "<tr>\n<td>Forever</td>\n";
	print "<td><a href=\"$png_url/$printer\_overall.png\">$printer\_overall.png</a></td>\n";

	print "</tbody></table>\n";
	
	print "<p><h3>Statistics</h3>\n";
	
	print "<table cellpadding=2 cellspacing=2 border=1>\n";
	print "<thead>\n";
	
	print "<td><b>Parameter</b></td>\n";
	print "<td><b>Graph</b></td>\n";
	print "<td><b>Minimum</b></td>\n";
	print "<td><b>Maximum</b></td>\n";
	print "<td><b>Mean</b></td>\n";
	print "<td><b>Standard Deviation</b></td>\n";
	print "<td><b>Median</b></td>\n";
	print "</thead>\n<tbody>\n";
	
	@stats = &calcstats (@ptime);
	print "<tr>\n<td>Parent Time</td>\n";
	print "<td><a href=\"$png_url/$printer\_ptime.png\">$printer\_ptime.png</a></td>\n";
	&webprint (@stats);

	@stats = &calcstats (@ctime);
	print "<tr>\n<td>Child Time</td>\n";
	print "<td><a href=\"$png_url/$printer\_ctime.png\">$printer\_ctime.png</a></td>\n";
	&webprint (@stats);

	@stats = &calcstats (@filesize);
	print "<tr>\n<td>File Size</td>\n";
	print "<td><a href=\"$png_url/$printer\_filesize.png\">$printer\_filesize.png</a></td>\n";
	&webprint (@stats);

	@stats = &calcstats (@pages);
	print "<tr>\n<td>Pages</td>\n";
	print "<td><a href=\"$png_url/$printer\_pages.png\">$printer\_pages.png</a></td>\n";
	&webprint (@stats);

	if ($colourspace ne "mono") {
		@stats = &calcstats (@cyan);
		print "<tr>\n<td>Total Cyan</td>\n";
		print "<td><a href=\"$png_url/$printer\_cyan.png\">$printer\_cyan.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@magenta);
		print "<tr>\n<td>Total Magenta</td>\n";
		print "<td><a href=\"$png_url/$printer\_magenta.png\">$printer\_magenta.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@yellow);
		print "<tr>\n<td>Total Yellow</td>\n";
		print "<td><a href=\"$png_url/$printer\_yellow.png\">$printer\_yellow.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@cyan_coverage_per_page);
		print "<tr>\n<td>Cyan/Page</td>\n";
		print "<td><a href=\"$png_url/$printer\_cyan_per_page.png\">$printer\_cyan_per_page.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@magenta_coverage_per_page);
		print "<tr>\n<td>Magenta/Page</td>\n";
		print "<td><a href=\"$png_url/$printer\_magenta.png\">$printer\_magenta_per_page.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@yellow_coverage_per_page);
		print "<tr>\n<td>Yellow/Page</td>\n";
		print "<td><a href=\"$png_url/$printer\_yellow_per_page.png\">$printer\_yellow_per_page.png</a></td>\n";
		&webprint (@stats);
	}

	if ($colourspace ne "cmy") {
		@stats = &calcstats (@black);
		print "<tr>\n<td>Total Black</td>\n";
		print "<td><a href=\"$png_url/$printer\_black.png\">$printer\_black.png</a></td>\n";
		&webprint (@stats);

		@stats = &calcstats (@black_coverage_per_page);
		print "<tr>\n<td>Black/Page</td>\n";
		print "<td><a href=\"$png_url/$printer\_black_per_page.png\">$printer\_black_per_page.png</a></td>\n";
		&webprint (@stats);
	}
	
	print "</tbody></table><p>\n";
} else {
	print "Statistical parameters (minimum, maximum, mean, standard deviation and median)\n";
	print "==============================================================================\n";
	@stats = &calcstats (@ptime);
	print "Parent Time: @stats\n";
	@stats = &calcstats (@ctime);
	print "Child Time: @stats\n";
	@stats = &calcstats (@filesize);
	print "File Size: @stats\n";
	@stats = &calcstats (@pages);
	print "Pages: @stats\n";

	if ($colourspace ne "mono") {
		@stats = &calcstats (@cyan);
		print "Cyan: @stats\n";
		@stats = &calcstats (@magenta);
		print "Magenta: @stats\n";
		@stats = &calcstats (@yellow);
		print "Yellow: @stats\n";
		@stats = &calcstats (@cyan_coverage_per_page);
		print "Cyan Coverage Per Page: @stats\n";
		@stats = &calcstats (@magenta_coverage_per_page);
		print "Magenta Coverage Per Page: @stats\n";
		@stats = &calcstats (@yellow_coverage_per_page);
		print "Yellow Coverage Per Page: @stats\n";
	}

	if ($colourspace ne "cmy") {
		@stats = &calcstats (@black);
		print "Black: @stats\n";
		@stats = &calcstats (@black_coverage_per_page);
		print "Black Coverage Per Page: @stats\n";
	}
}

sub hist {
	my ($nslices, $fraction, @data) = @_;
	my ($val, $min, $max, @bins, $idx, $delta, $i, $step, $start, $x, @graphdata, $total, $cumulative, @sdata);
	
	$max = 0;
	$min = HUGE_VAL;
	
	for ($i = 0; $i < $nslices; $i++) {
		$bins[$i] = 0;
	}

	@sdata = sort {$a <=> $b} @data;
	
	$min = $sdata[0];
	
	$total = 0;

	foreach $val (@sdata) {
		$total += $val;
	}
	
	$cumulative = 0;
	
	$i = 0;

	$max = $total * $fraction;

	foreach $val (@sdata) {
		$cumulative += $val;
		$max = $val;
		
		last if ($cumulative > ($total * $fraction));
	}

	$delta = ($max - $min);
	
	return () if ($delta == 0);

	foreach $val (@sdata) {
		if ($val < $max) {
			$idx = $nslices * (($val - $min) / $delta);
			$idx-- if ($idx == $nslices);
			$bins[$idx]++;
		}
	}
	
	$i = 0;
	$step = $delta / $nslices;
	$start = $min + $step / 2;
	
	foreach (@bins) {
		$x = $start + $step * $i;
		$graphdata[$i] = "$x\t$_";
		$i++;
	}

	return @graphdata;
}

sub plot {
	my ($filename, $xlabel, $ylabel, $xrange, @graphdata) = @_;
	my $i;
	
	die if ($filename eq "");
		
	open GP, "|$params{'gnuplot'}";
	print GP "set xlabel \"$xlabel\"\n";
	print GP "set ylabel \"$ylabel\"\n";
	print GP "set xrange $xrange\n";
	print GP "set grid\n";
	print GP "set nokey\n";
	print GP "set term png colour\n";
	print GP "set output \"$filename\"\n";
	print GP "plot \"-\" with boxes\n";

	foreach (@graphdata) {
		print GP "$_\n";
	}

	close GP;
}

# Returns mean, variance and median.

sub calcstats {
	my @data = sort {$a <=> $b} @_;
	my ($total, $mean, $stddev, $median, $sum, $tmp);
	
	$total = 0;
	
	foreach (@data) {
		$total += $_;
	}
	
	$sum = $total;
	
	$mean = $total / (1 + $#data);
	
	$total = 0;
	
	foreach (@data) {
 		$tmp = $mean - $_;
 		$total += $tmp * $tmp;
	}
	
	$stddev = sqrt ($total) / (1 + $#data);
	
	$total = 0;
	
	foreach (@data) {
		$total += $_;
		
		if ($total >= ($sum / 2)) {
			$median = $_;
			last;
		}
	}
	
	return ($data[0], $data[$#data], $mean, $stddev, $median);
}

sub webprint {
	foreach (@_) {
		print "<td>$_</td>\n";
	}
	
	print "</tr>\n";
}
