#
#    Report.pm - Module that compiles accounting reports from the data dumped
#		 by the fwctl dump-acct command.
#
#    This file is part of Fwctl.
#
#    Author: Francis J. Lacoste <francis.lacoste@iNsu.COM>
#
#    Copyright (C) 2000 iNsu Innovations Inc.
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms same terms as perl itself.
#
package Fwctl::AcctReport;

use strict;

use vars qw( $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS @ISA );

use Math::BigInt;
use Symbol;
use Fwctl::Report;
use Exporter;

BEGIN {
    ($VERSION) = '$Revision: 1.4 $' =~ /(Revision: ([\d.]+))/;
    @ISA = qw( Exporter );

    @EXPORT = ();

    @EXPORT_OK = ();

    %EXPORT_TAGS = ( fields => [ qw(  TIME PACKETS BYTES NAME ) ],
		   );
}

BEGIN {
    # Create the necessary constant
    my $i = 0;
    for my $f ( @{$EXPORT_TAGS{fields}} ) {
	eval "use constant $f => $i;";
	$i++;
    }

    Exporter::export_ok_tags( 'fields' );
};

=pod

=head1 NAME

Fwctl::AcctReport - Generates reports from C<fwctl dump-acct> command.

=head1 SYNOPSIS

    use Fwctl::AcctReport;

    my $report = new Fwctl::Report( options ... );

    my $r = $report->summary_report;

    foreach my $r ( @$src_alias_sum ) {
	print $r->{name}, " = ", $r->{bytes_sum}, "\n";
    }

=head1 DESCRIPTION

The Fwctl::AcctReport(3) module can be used to generate packet and bandwith
report from the data generated by the C<fwctl dump-acct> command.

This module can be used as backend for two kinds of report. Summary which
compiles the total number of packets and bytes received by each accounting
chains over the report's period or histogram type report where you have the
packets and bytes total for each sample (hour, day, etc) over the report's
period.

=head1 CAVEATS

The C<fwctl dump-acct> commands dumps the packets and bytes total
since the time of the last invocation. If the last invocation wasn't
wholly comprised in the current sample, the total will be
interpolated. This means that you should run the C<fwctl dump-acct>
command at least as frequently as the smallest sample period you want
to use.

=cut

sub read_records {
    my $self = shift;

    # Read in the data
    my $records = [];
    push @{$self->{opts}{files}}, \*STDIN unless @{$self->{opts}{files}};
  FILE:
    foreach my $file ( @{$self->{opts}{files}} ) {
	my $fh;
	if ( ref $file ) {
	    $fh = $file;
	} elsif ( $file eq "-" ) {
	    $fh = \*STDIN;
	} else {
	    $fh = gensym;
	    open $fh, $file
	      or do { warn "can't open file $file\n"; next FILE };
	}

	# We subtract gives ourselves more records since the entries are
	# the amount between the current record timestamp and the one before
	# it.
	my $start = $self->{start};
	$start -=  2 * $self->{sample} if defined $start;

	my $end = $self->{end};
	$end += 2 * $self->{sample} if defined $end;

      LINE:
	while  ( my $line = <$fh>) {
	    chomp $line;
	    my @fields = (split /\s+/, $line)[0,2,3,4];

	    if ( ! defined $self->{start}) {
		$start = $self->{start} = $fields[TIME];
		$start -= 2 * $self->{sample};
		if ( defined $self->{period} ) {
		    $end = $self->{end} = $self->{start} + $self->{period};
		    $end += 2 * $self->{sample};
		}
	    }
	    # Skip fields outside the period
	    next unless $start <= $fields[TIME] && $end >= $fields[TIME];

	    if ( $self->{opts}{names} ) {
		foreach my $chain ( @{ $self->{opts}{names} } ) {
		    if ( $chain eq $fields[NAME] ) {
			push @$records, \@fields;
			next LINE;
		    }
		}
	    } else {
		push @$records, \@fields;
	    }
	}
    }

    # Records are sorted by time.
    $self->{records} = [ sort { $a->[TIME] <=> $b->[TIME] } @$records ];
}

=pod

=head1 CREATING A NEW REPORT OBJECT

    Ex. my $report = new Fwctl::AcctReport( start  => 'yesterday',
					    period => '1 day',
					    sample => '1d',
					    files  => [ 'log' ] );

=head2 PARAMETERS

The C<new> method accepts the following parameter :

=over

=item files

Specifies the file from which to read the dumped acctounting informations.
It is an array of file handle or file names. If this parameter is not specified
the records will be read from STDIN.

=item start

Sets the start of the report's period. If the Date::Manip(3) module is
installed, you can use any format that this module can parse. If that module
isn't installed you must use the following format YYYY-MM-DD HH:MM:SS or any
meaningful subset of that format.

If this option is not used, the report will start with the first record.

=item end

Sets the end of the report's period. If the Date::Manip(3) module is
installed, you can use any format that this module can parse. If that module
isn't installed you must use the following format YYYY-MM-DD HH:MM:SS or any
meaningful subset of that format.

If this option is not used, the report will end with the last record.

=item period

Sets the length of the report's period. This length is interpreted relative
to the report's start. This option has priority over the B<end> option.

If you have the Date::Manip module installed, you can use any format that this
module can parse. If that module isn't available, you can use a subset of the
following format X weeks X days X hours X mins X secs.

=item sample

Sets the length the sample's for the report. The sample_report will
returns results aggregated over that length. For example, if you use
1d as sample's length, you will get the stats for every day from the
beginning of the report until the end.

If you have the Date::Manip module installed, you can use any format
that this module can parse. If that module isn't available, you can
use a subset of the following format X weeks X days X hours X mins X
secs.

=item names

Restrict records to those whose name matches B<names>.

You can use this parameter multiple times to specify multiple
possibility. The record will be included if it matches any of those.

=cut

sub new {
    my $proto = shift;
    my $class = ref $proto || $proto;

    my $self = { opts	    => { @_ },
		 records    => undef,
		 start	    => undef,
		 end	    => undef,
		 period	    => undef,
		 sample	    => 3600, # One hour sample
		 sample_report  => undef,
		 summary_report => undef,
	       };

    # Determine start and end of the report;
    if ( $self->{opts}{start} ) {
	$self->{start} = Fwctl::Report::parse_date( $self->{opts}{start} )
	  or die "invalid start date format: $self->{opts}{start}\n";
    }

    if ( $self->{opts}{period} ) {
	$self->{period} = Fwctl::Report::parse_period( $self->{opts}{period} )
	  or die "invalid period delta: $self->{opts}{period}\n";
	if (  $self->{start} ) {
	    $self->{end} = $self->{start} + $self->{period};
	}
    } elsif ( $self->{opts}{end} ) {
	$self->{end} = Fwctl::Report::parse_date( $self->{opts}{end} ) 
	  or die "invalid end date format: $self->{opts}{end}\n";
    } else {
	$self->{end} = time;
    }

    if ( $self->{opts}{sample} ) {
	$self->{sample} = Fwctl::Report::parse_period( $self->{opts}{sample} )
	  or die "invalid sample period: $self->{opts}{sample}\n";
    }
    bless $self, $class;

    $self->read_records;

    $self;
}

=pod

=head1 METHODS

=head1 start()

Return the start of the report in seconds since epoch.

=cut

sub start {
    $_[0]->{start};
}

=pod

=head1 end()

Returns the end of the report in seconds since epoch.

=cut

sub end {
    $_[0]->{end};
}

=pod

=head1 period()

Returns the length of the report's period ( $report->end() - $report->start() )

=cut

sub period {
    $_[0]->{end} - $_[0]->{period};
}

=pod

=head1 sample()

Returns the length of the sample period.

=cut

sub sample {
    $_[0]->{sample};
}

=head1 records()

Returns an array reference to all the records read and which makes the
report's samples.

=head2 RECORD FIELDS

Each record is an array ref. You can accessed the individual fields of
the record by using the following constants. (Those can be imported by
using the C<:fields> import tag.)

=over

=item TIME

The epoch start time of the sample.

=item NAME

The name of the accounting chain.

=item PACKETS

The number of packets received.

=item BYTES

The total number of bytes.

=back

=cut

sub records {
    # Copy the records
    [ @{$_[0]->{records}} ];
}



=pod

=head1 REPORTS

The following report generation methods are available :

=head2 summary_report()

    my $r = $report->summary_report();


Generates a report that shows the compiled statistics for every
accounting chain.

The resulting report is an array ref of hash reference. Each report
record's has the following fields.

=over

=item name

The name of the chains.

=item packets_sum

The total number of packets over the report's period. 

This number is a Math::BigInt(3) object.

=item bytes_sum

The total number of bytes received over the report's period. 

This number is a Math::BigInt(3) object.

=item packets_avg

The average number of packets received by that chain by sample. 

This number is a Math::BigInt(3) object.

=item bytes_avg

The average number of packets bytes by that chain by sample.

This number is a Math::BigInt(3) object.

=back

=cut

sub summary_report {
    my ( $self ) = shift;

    return $self->{summary_report} if defined $self->{summary_report};

    my %chains = ();
    my $sample_report = $self->sample_report;
    while ( my ( $chain, $samples ) = each %$sample_report ) {
	my $stats =  {
		      name	      => $chain,
		      packets_sum   => new Math::BigInt( 0 ),
		      bytes_sum     => new Math::BigInt( 0 ),
		      packets_avg   => 0,
		      bytes_avg     => 0,
		     };

	foreach my $r ( @$samples ) {
	    $stats->{packets_sum} += $r->{packets};
	    $stats->{bytes_sum} += $r->{bytes};
	}
	$stats->{packets_avg} = $stats->{packets_sum} / @$samples;
	$stats->{bytes_avg}   = $stats->{bytes_sum} / @$samples;

	# Replace samples by stats
	$chains{$chain} = $stats;
    }

    # Returns an array ref sorted by name
    return $self->{summary_report} = [ sort { $a->{name} cmp $b->{name} } values %chains ];
}

=pod

=head2 sample_report()

    my $r = $report->sample_report();


Generates a report that compiles the stats for each sample for each chains.

The resulting report is an hash reference. Each key is a chain and each value
is an array ref to the chain compiled statistics. Each element of the array is
an hash reference comprises of the following elements :

=over

=item start

The start time in seconds since epoch for that sample.

=item packets

The total number of packets received in that sample.

This number is a Math::BigInt(3) object.

=item bytes

The total number of bytes received in that sample.

This number is a Math::BigInt(3) object.

=back

=cut

sub sample_report {
    my ( $self ) = shift;

    # Return cache
    return $self->{sample_report} if defined $self->{sample_report};

    my $start  = $self->{start};
    my $end    = $self->{end};
    my $sample = $self->{sample};
    my $num_samples = int( ($end - $start ) / $sample );
    $num_samples++ unless $num_samples * $sample + $start > $end;

    my %chains = ();
    # Sort records by chain, since records are ordered by time,
    # all chain's record are also ordered.
    foreach my $r ( @{ $self->{records} } ) {
	push @{$chains{$r->[NAME]}}, $r;
    }

    # Foreach chains build an array containing the number of bytes and packets
    # by sample period.
    while ( my ( $chain, $records ) = each %chains ) {
	# Initialize the samples array for that chain
	my @samples = ( ) x $num_samples;
	for ( my $i = 0 ; $i < $num_samples; ++$i ) {
	    $samples[$i] = {
			    start   => $start + $i * $sample,
			    packets => new Math::BigInt(0),
			    bytes   => new Math::BigInt(0),
			   };
	}

	my ($last_s_idx, $last_time) = ( -1, 0 );
      RECORD:
	foreach my $r ( @$records ) {
	    # Find sample in which this record falls
	    my $s_idx = int ( ( $r->[TIME] - $start ) / $sample );

	    # If this record is before the beginning of the
	    # period, save time for normalization and go to
	    # next record
	    if ( $s_idx < 0 ) {
		$last_time = $r->[TIME];
		next RECORD;
	    }

	    # Comparing to the last record, is it completely
	    # in this period.
	    if ( $last_s_idx == $s_idx ) {
		# Add bytes and packet counters
		$samples[$s_idx]{bytes}   += $r->[BYTES];
		$samples[$s_idx]{packets} += $r->[PACKETS];
	    } else {
		# Distribute pro rata
		my $this_sample_ratio = ($r->[TIME] - ($start + $s_idx * $sample) ) / $sample;
		my $last_sample_ratio = 1 - $this_sample_ratio;
		if ( $s_idx >= $num_samples ) {
		    # This is the last record
		    $samples[$last_s_idx]{bytes}   += int( $r->[BYTES]   * $last_sample_ratio );
		    $samples[$last_s_idx]{packets} += int( $r->[PACKETS] * $last_sample_ratio );
		    last RECORD;
		} elsif ( $s_idx == 0 ) {
		    # This is the first record
		    $samples[$s_idx]{bytes}   += int( $r->[BYTES] * $this_sample_ratio );
		    $samples[$s_idx]{packets} += int( $r->[PACKETS] * $this_sample_ratio );
		} else {
		    $samples[$last_s_idx]{bytes}   += int( $r->[BYTES] * $last_sample_ratio );
		    $samples[$last_s_idx]{packets} += int( $r->[PACKETS] * $last_sample_ratio );
		    $samples[$s_idx]{bytes}   += int( $r->[BYTES] * $this_sample_ratio );
		    $samples[$s_idx]{packets} += int( $r->[PACKETS] * $this_sample_ratio );
		}
		$last_s_idx = $s_idx;
	    }
	    $last_time = $r->[TIME];
	}
	# Replace records by sample stats
	$chains{$chain} = \@samples;
    }

    # Return an hash of array of hash.
    return $self->{sample_report} = \%chains;
}

1;

__END__

=pod

=head1 AUTHOR

Francis J. Lacoste <francis.lacoste@iNsu.COM>

=head1 COPYRIGHT

Copyright (c) 2000 iNsu Innovations Inc.
All rights reserved.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

=head1 SEE ALSO

Fwctl(3) Fwctl::RuleSet(3) fwctl(8) fwctllog(8) fwctacctreport(8)
Fwctl::Report(3) Date::Manip(3).

=cut