use strict;
use File::Slurp qw(read_file);
use JSON;
use Statistics::Lite qw(:all);
=head1 NAME
mojo_cape_submit_extend - Compute stats for mojo_cape_submit based on incoming JSONs
=head1 VERSION
sub version {
print "mojo_cape_submit_extend v. 0.1.0\n";
exit 255;
mojo_cape_submit_extend [B<-Z>] [B<-m> <incoming JSON dir>]
Computes the stats for mojo_cape_submit based on the incoming JSON files.
=head2 -m <dir>
The incoming JSON dir.
Default: /malware/incoming-json/
=head2 -Z
Do not optionally GZip+Base64 compress the results.
=head1 Generated JSON
The generated JSON is a standard LibreNMS style return. For more
- .changed_hashes[] :: A array of IDs that has had the hashes changed from between
being checksumed prior to sending and after being received.
All stats generated are gauges, just displaying the value for the current time slot.
Totals hash...
- .totals.app_proto.$app_proto :: '.totals.app_proto' is a hash of totals for files
extracted using that app proto.
- .totals.hash_changed :: Total of hashes that changed.
- .totals.size_max :: Max submitted file size.
- .totals.size_mean :: Mean size of submitted files.
- .totals.size_median :: Median size of submitted files.
- .totals.size_min :: Min size of submitted files.
- .totals.size_mode :: Mode of the size of submitted files.
- .totals.size_stddev :: Standard deviation of the size of submitted files.
- .totals.size_sum :: Sum of the size of submitted files.
- .totals.slug_count :: Total number of submitted files.
- .totals.sub_count :: Total number of submitted files.
Slugs hash...
- .slugs.$slug :: A per slug hash that is the same as the totals hash,
but only for the that specific slug. This only includes slug seen
during the current time slot.
my $extend_version = 1;
my $version;
my $help;
my $rewind_by = 300;
my $dont_compress = 0;
my $incoming_json_dir = '/malware/incoming-json/';
'h' => \$help,
'help' => \$help,
'v' => \$version,
'version' => \$version,
'Z' => \$dont_compress,
'm=s' => \$incoming_json_dir,
if ($version) {
if ($help) {
pod2usage( -exitval => 255, -verbose => 2, -output => \*STDOUT, );
} ## end if ($help)
my $t = localtime;
my $target_time = $t->epoch - $rewind_by;
my @files = File::Find::Rule->file()->name(qr/^[0-9]+\.json$/)->ctime( '>=' . $target_time )->in($incoming_json_dir);
my $data = {
totals => {
hash_changed => 0,
size_min => 0,
size_mean => 0,
size_median => 0,
size_mode => 0,
size_max => 0,
size_stddev => 0,
size_sum => 0,
sub_count => 0,
app_proto => {},
app_protos => 0,
slugs => {},
changed_hashes => [],
my @all_sizes;
my $slug_sizes;
my $errorString = '';
my $error = 0;
foreach my $file (@files) {
my $slug;
my $hash_changed = 0;
my $short_file = $file;
$short_file =~ s/.*\///;
eval {
my $json = decode_json( read_file($file) );
push( @all_sizes, $json->{cape_submit}{size} );
$slug = $json->{suricata_extract_submit}{slug};
if ( !defined( $data->{slugs}{$slug} ) ) {
$data->{slugs}{$slug} = {
hash_changed => 0,
size_min => 0,
size_mean => 0,
size_median => 0,
size_mode => 0,
size_max => 0,
size_stddev => 0,
size_sum => 0,
sub_count => 0,
app_proto => {},
app_protos => 0,
$slug_sizes->{$slug} = [];
} ## end if ( !defined( $data->{slugs}{$slug} ) )
push( @{ $slug_sizes->{$slug} }, $json->{cape_submit}{size} );
if ( $json->{cape_submit}{sha256} ne $json->{suricata_extract_submit}{sha256} ) {
$hash_changed = 1;
push( @{ $data->{changed_hashes} }, $short_file );
if ( defined( $data->{slugs}{$slug} ) ) {
$data->{slugs}{$slug}{hash_changed} += $hash_changed;
$data->{totals}{hash_changed} += $hash_changed;
if ( defined( $json->{app_proto} ) ) {
my $app_proto = $json->{app_proto};
if ( !defined( $data->{totals}{app_proto}{$app_proto} ) ) {
$data->{totals}{app_proto}{$app_proto} = 0;
if ( !defined( $data->{slugs}{$slug}{app_proto}{$app_proto} ) ) {
$data->{slugs}{$slug}{app_proto}{$app_proto} = 0;
} ## end if ( defined( $json->{app_proto} ) )
if ($@) {
$errorString = $errorString . $short_file . ': ' . $@;
$error = 1;
} ## end foreach my $file (@files)
# only do this if we have values, otherwise they will end up as null
# which will make the display very suboptimal for LibreNMS
if ( defined( $all_sizes[0] ) ) {
$data->{totals}{size_min} = min(@all_sizes);
$data->{totals}{size_max} = max(@all_sizes);
$data->{totals}{size_mean} = mean(@all_sizes);
$data->{totals}{size_median} = median(@all_sizes);
$data->{totals}{size_mode} = mode(@all_sizes);
$data->{totals}{size_stddev} = stddev(@all_sizes);
$data->{totals}{size_sum} = sum(@all_sizes);
my @slugs = keys( %{$slug_sizes} );
foreach my $item (@slugs) {
$data->{slugs}{$item}{size_min} = min(@all_sizes);
$data->{slugs}{$item}{size_max} = max(@all_sizes);
$data->{slugs}{$item}{size_mean} = mean(@all_sizes);
$data->{slugs}{$item}{size_median} = median(@all_sizes);
$data->{slugs}{$item}{size_mode} = mode(@all_sizes);
$data->{slugs}{$item}{size_stddev} = stddev(@all_sizes);
$data->{slugs}{$item}{size_sum} = sum(@all_sizes);
} ## end if ( defined( $all_sizes[0] ) )
my $json_string = encode_json(
data => $data,
version => $extend_version,
error => $error,
errorString => $errorString,
if ( !$dont_compress ) {
# gzip and print encode in base64
# base64 is needed as snmp does not like
my $compressed = encode_base64( gzip($json_string) );
$compressed =~ s/\n//g;
$compressed = $compressed . "\n";
# check which is smaller and prints it
if ( length($compressed) > length($json_string) ) {
print $json_string. "\n";
} else {
print $compressed;
} else {
print $json_string. "\n";