From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

#!/usr/bin/perl
# Copyright 2022 Jeffrey Kegler
# This file is part of Marpa::R2. Marpa::R2 is free software: you can
# redistribute it and/or modify it under the terms of the GNU Lesser
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# Marpa::R2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with Marpa::R2. If not, see
use 5.010001;
use strict;
use English qw( -no_match_vars );
use List::Util qw(sum);
use Fatal qw(open close);
sub usage {
say {*STDERR} "$PROGRAM_NAME html_score [uri|file]" or die "say failed: $ERRNO";
exit 1;
}
my $html_flag = 0;
my $help_flag = 0;
my $dump_config_flag = 0;
my $dump_AHFA_flag = 0;
my $trace_terminals_flag = 0;
my $trace_cruft_flag = 0;
my $trace_values_flag = 0;
my $compile_flag;
usage() unless GetOptions(
'html' => \$html_flag,
'help' => \$help_flag,
# undocumented
'dump-config' => \$dump_config_flag,
'dump-AHFA' => \$dump_AHFA_flag,
'compile=s' => \$compile_flag,
'trace-terminals' => \$trace_terminals_flag,
'trace-cruft' => \$trace_cruft_flag,
'trace-values' => \$trace_values_flag,
);
usage() unless $help_flag or 1 >= scalar @ARGV;
my $locator = shift;
my $document;
GET_DOCUMENT: {
if ( not defined $locator ) {
$locator = 'STDIN'; # for possible display, later
local $RS = undef;
## no critic(InputOutput::ProhibitExplicitStdin)
$document = <STDIN>;
last GET_DOCUMENT;
} ## end if ( not defined $locator )
if ( $locator =~ /\A [[:alnum:]]+ [:] /xms ) {
require WWW::Mechanize;
my $mech = WWW::Mechanize->new( autocheck => 1 );
$mech->get($locator);
$document = $mech->content;
undef $mech;
last GET_DOCUMENT;
} ## end if ( $locator =~ /\A [[:alnum:]]+ [:] /xms )
{
local $RS = undef;
open my $fh, q{<}, $locator;
$document = <$fh>;
close $fh;
}
} ## end GET_DOCUMENT:
sub calculate_max_depths {
my ($descendant_data) = @_;
my %return_depths = ( ANY => 0 );
for my $child_value ( grep { ref $_ } map { $_->[0] } @{$descendant_data} ) {
my $depths = $child_value->{depths};
CHILD_TAGNAME: for my $child_tagname ( keys %{$depths} ) {
my $depth = $depths->{$child_tagname};
if ( $depth > ( $return_depths{$child_tagname} // 0 ) ) {
$return_depths{$child_tagname} = $depth;
}
if ( $depth > $return_depths{ANY} ) {
$return_depths{ANY} = $depth;
}
} ## end for my $child_tagname ( keys %{$depths} )
} ## end for my $child_value ( grep { ref $_ } map { $_->[0] }...)
return \%return_depths;
} ## end sub calculate_max_depths
sub calculate_length {
my ($descendant_data) = @_;
my $length = 0;
for my $descendant_data ( @{$descendant_data} ) {
my ( $value, $literal ) = @{$descendant_data};
my $this_length;
if (defined $value) {
$this_length = $value->{length};
} else {
(my $no_whitespace_literal = $literal) =~ s/\s//xmsg;
$this_length = length $no_whitespace_literal;
}
$length += $this_length;
} ## end for my $descendant_data ( @{$descendant_data} )
return $length;
} ## end sub calculate_length
my %flags = (
trace_terminals => $trace_terminals_flag,
trace_cruft => $trace_cruft_flag,
trace_values => $trace_values_flag,
dump_config => $dump_config_flag,
dump_AHFA => $dump_AHFA_flag,
);
if (defined $compile_flag) {
open my $fh, q{<}, $compile_flag;
my $source = join q{}, <$fh>;
close $fh;
$flags{compile} = \$source;
}
my ( $instance, $value ) = @{
Marpa::R2::HTML::html(
\$document,
{ ':COMMENT' => sub { return { depths => {}, length => 0 } },
q{*} => sub {
my $descendant_data =
Marpa::R2::HTML::descendants('value,literal');
my $tagname = Marpa::R2::HTML::tagname();
my $length = calculate_length($descendant_data);
$Marpa::R2::HTML::INSTANCE->{count}->{$tagname}++;
$Marpa::R2::HTML::INSTANCE->{length}->{$tagname} += $length;
my $return_depths = calculate_max_depths($descendant_data);
( $return_depths->{$tagname} //= 0 )++;
$return_depths->{ANY}++;
return {
depths => $return_depths,
length => $length,
};
},
':CRUFT' => sub {
my $descendant_data =
Marpa::R2::HTML::descendants('value,literal');
my $return_depths = { '[CRUFT]' => 1 };
my $length = calculate_length($descendant_data);
$Marpa::R2::HTML::INSTANCE->{count}->{'[CRUFT]'}++;
$Marpa::R2::HTML::INSTANCE->{length}->{'[CRUFT]'} += $length;
return {
depths => $return_depths,
length => $length,
};
},
':TOP' => sub {
my $descendant_data =
Marpa::R2::HTML::descendants('value,literal');
return [
$Marpa::R2::HTML::INSTANCE,
{ depths => calculate_max_depths($descendant_data),
length => calculate_length($descendant_data),
},
];
},
},
\\%flags
),
};
my $length_by_element = $instance->{length};
my $count_by_element = $instance->{count};
my $html_length = $length_by_element->{html};
my $total_lengths = List::Util::sum values %{ $length_by_element };
my $complexity = 0;
if ( $html_length >= 1 ) {
$complexity = sprintf "%.3f",
( $total_lengths / ( $html_length * log($html_length) ) );
}
my $max_depths = $value->{depths};
my $max_element_depth = $max_depths->{ANY};
delete $max_depths->{ANY};
if ($html_flag) {
print qq{<table cellpadding="3" border="1">}
. qq{<thead>\n}
. qq{<tr><th colspan="5">$locator</tr>\n}
. qq{<tr><th colspan="5">Complexity Score = $complexity</tr>\n}
. qq{<tr><th colspan="5">Maximum Depth = $max_element_depth</tr>\n}
. qq{<tr>}
. qq{<th>Element}
. qq{<th>Maximum<br>Nesting}
. qq{<th>Number of<br>Elements}
. qq{<th>Size in<br>Characters</th>}
. qq{<th>Average<br>Size</th>}
. qq{</tr>\n}
. qq{</thead>\n};
} else {
say $locator;
say "Complexity Score = ", $complexity;
say "Maximum Depth = ", $max_element_depth;
printf "%11s%11s%11s%11s%11s\n", q{}, 'Maximum ', 'Number of', 'Size in ', 'Average';
printf "%11s%11s%11s%11s%11s\n", 'Element ', 'Nesting ', 'Elements ', 'Characters', 'Size ';
}
for my $element ( sort keys %{$max_depths} ) {
my $count = $count_by_element->{$element};
my $size = $length_by_element->{$element};
my $average = $count ? int( $size / $count ) : q{-};
if ($html_flag) {
print join q{},
q{<tr>},
qq{<td>$element</td>},
q{<td align="right">}, $max_depths->{$element}, q{</td>},
qq{<td align="right">$count</td>},
qq{<td align="right">$size</td>},
qq{<td align="right">$average</td>},
"</tr>\n";
} else {
printf "%-11s%11d%11d%11d%11d\n", $element, $max_depths->{$element}, $count, $size, $average;
}
} ## end for my $element ( sort keys %{$max_depths} )
$html_flag and print qq{</table>\n};
exit 0;
# vim: set expandtab shiftwidth=4: