#!/usr/bin/perl
use
5.010001;
use
Fatal
qw(open close)
;
sub
usage {
say
{
*STDERR
}
"$PROGRAM_NAME html_score [uri|file]"
or
die
"say failed: $ERRNO"
;
exit
1;
}
my
$html_flag
= 0;
my
$help_flag
= 0;
my
$dump_config_flag
= 0;
my
$dump_AHFA_flag
= 0;
my
$trace_terminals_flag
= 0;
my
$trace_cruft_flag
= 0;
my
$trace_values_flag
= 0;
my
$compile_flag
;
usage()
unless
GetOptions(
'html'
=> \
$html_flag
,
'help'
=> \
$help_flag
,
'dump-config'
=> \
$dump_config_flag
,
'dump-AHFA'
=> \
$dump_AHFA_flag
,
'compile=s'
=> \
$compile_flag
,
'trace-terminals'
=> \
$trace_terminals_flag
,
'trace-cruft'
=> \
$trace_cruft_flag
,
'trace-values'
=> \
$trace_values_flag
,
);
usage()
unless
$help_flag
or 1 >=
scalar
@ARGV
;
my
$locator
=
shift
;
my
$document
;
GET_DOCUMENT: {
if
( not
defined
$locator
) {
$locator
=
'STDIN'
;
local
$RS
=
undef
;
$document
= <STDIN>;
last
GET_DOCUMENT;
}
if
(
$locator
=~ /\A [[:alnum:]]+ [:] /xms ) {
my
$mech
= WWW::Mechanize->new(
autocheck
=> 1 );
$mech
->get(
$locator
);
$document
=
$mech
->content;
undef
$mech
;
last
GET_DOCUMENT;
}
{
local
$RS
=
undef
;
open
my
$fh
,
q{<}
,
$locator
;
$document
= <
$fh
>;
close
$fh
;
}
}
sub
calculate_max_depths {
my
(
$descendant_data
) =
@_
;
my
%return_depths
= (
ANY
=> 0 );
for
my
$child_value
(
grep
{
ref
$_
}
map
{
$_
->[0] } @{
$descendant_data
} ) {
my
$depths
=
$child_value
->{depths};
CHILD_TAGNAME:
for
my
$child_tagname
(
keys
%{
$depths
} ) {
my
$depth
=
$depths
->{
$child_tagname
};
if
(
$depth
> (
$return_depths
{
$child_tagname
} // 0 ) ) {
$return_depths
{
$child_tagname
} =
$depth
;
}
if
(
$depth
>
$return_depths
{ANY} ) {
$return_depths
{ANY} =
$depth
;
}
}
}
return
\
%return_depths
;
}
sub
calculate_length {
my
(
$descendant_data
) =
@_
;
my
$length
= 0;
for
my
$descendant_data
( @{
$descendant_data
} ) {
my
(
$value
,
$literal
) = @{
$descendant_data
};
my
$this_length
;
if
(
defined
$value
) {
$this_length
=
$value
->{
length
};
}
else
{
(
my
$no_whitespace_literal
=
$literal
) =~ s/\s//xmsg;
$this_length
=
length
$no_whitespace_literal
;
}
$length
+=
$this_length
;
}
return
$length
;
}
my
%flags
= (
trace_terminals
=>
$trace_terminals_flag
,
trace_cruft
=>
$trace_cruft_flag
,
trace_values
=>
$trace_values_flag
,
dump_config
=>
$dump_config_flag
,
dump_AHFA
=>
$dump_AHFA_flag
,
);
if
(
defined
$compile_flag
) {
open
my
$fh
,
q{<}
,
$compile_flag
;
my
$source
=
join
q{}
, <
$fh
>;
close
$fh
;
$flags
{compile} = \
$source
;
}
my
(
$instance
,
$value
) = @{
Marpa::R2::HTML::html(
\
$document
,
{
':COMMENT'
=>
sub
{
return
{
depths
=> {},
length
=> 0 } },
q{*}
=>
sub
{
my
$descendant_data
=
Marpa::R2::HTML::descendants(
'value,literal'
);
my
$tagname
= Marpa::R2::HTML::tagname();
my
$length
= calculate_length(
$descendant_data
);
$Marpa::R2::HTML::INSTANCE
->{count}->{
$tagname
}++;
$Marpa::R2::HTML::INSTANCE
->{
length
}->{
$tagname
} +=
$length
;
my
$return_depths
= calculate_max_depths(
$descendant_data
);
(
$return_depths
->{
$tagname
} //= 0 )++;
$return_depths
->{ANY}++;
return
{
depths
=>
$return_depths
,
length
=>
$length
,
};
},
':CRUFT'
=>
sub
{
my
$descendant_data
=
Marpa::R2::HTML::descendants(
'value,literal'
);
my
$return_depths
= {
'[CRUFT]'
=> 1 };
my
$length
= calculate_length(
$descendant_data
);
$Marpa::R2::HTML::INSTANCE
->{count}->{
'[CRUFT]'
}++;
$Marpa::R2::HTML::INSTANCE
->{
length
}->{
'[CRUFT]'
} +=
$length
;
return
{
depths
=>
$return_depths
,
length
=>
$length
,
};
},
':TOP'
=>
sub
{
my
$descendant_data
=
Marpa::R2::HTML::descendants(
'value,literal'
);
return
[
$Marpa::R2::HTML::INSTANCE
,
{
depths
=> calculate_max_depths(
$descendant_data
),
length
=> calculate_length(
$descendant_data
),
},
];
},
},
\\
%flags
),
};
my
$length_by_element
=
$instance
->{
length
};
my
$count_by_element
=
$instance
->{count};
my
$html_length
=
$length_by_element
->{html};
my
$total_lengths
= List::Util::sum
values
%{
$length_by_element
};
my
$complexity
= 0;
if
(
$html_length
>= 1 ) {
$complexity
=
sprintf
"%.3f"
,
(
$total_lengths
/ (
$html_length
*
log
(
$html_length
) ) );
}
my
$max_depths
=
$value
->{depths};
my
$max_element_depth
=
$max_depths
->{ANY};
delete
$max_depths
->{ANY};
if
(
$html_flag
) {
print
qq{<table cellpadding="3" border="1">}
.
qq{<thead>\n}
.
qq{<tr><th colspan="5">$locator</tr>\n}
.
qq{<tr><th colspan="5">Complexity Score = $complexity</tr>\n}
.
qq{<tr><th colspan="5">Maximum Depth = $max_element_depth</tr>\n}
.
qq{<tr>}
.
qq{<th>Element}
.
qq{<th>Maximum<br>Nesting}
.
qq{<th>Number of<br>Elements}
.
qq{<th>Size in<br>Characters</th>}
.
qq{<th>Average<br>Size</th>}
.
qq{</tr>\n}
.
qq{</thead>\n}
;
}
else
{
say
$locator
;
say
"Complexity Score = "
,
$complexity
;
say
"Maximum Depth = "
,
$max_element_depth
;
printf
"%11s%11s%11s%11s%11s\n"
,
q{}
,
'Maximum '
,
'Number of'
,
'Size in '
,
'Average'
;
printf
"%11s%11s%11s%11s%11s\n"
,
'Element '
,
'Nesting '
,
'Elements '
,
'Characters'
,
'Size '
;
}
for
my
$element
(
sort
keys
%{
$max_depths
} ) {
my
$count
=
$count_by_element
->{
$element
};
my
$size
=
$length_by_element
->{
$element
};
my
$average
=
$count
?
int
(
$size
/
$count
) :
q{-}
;
if
(
$html_flag
) {
print
join
q{}
,
q{<tr>}
,
qq{<td>$element</td>}
,
q{<td align="right">}
,
$max_depths
->{
$element
},
q{</td>}
,
qq{<td align="right">$count</td>}
,
qq{<td align="right">$size</td>}
,
qq{<td align="right">$average</td>}
,
"</tr>\n"
;
}
else
{
printf
"%-11s%11d%11d%11d%11d\n"
,
$element
,
$max_depths
->{
$element
},
$count
,
$size
,
$average
;
}
}
$html_flag
and
print
qq{</table>\n}
;
exit
0;