—#!/usr/local/bin/perl
# htmlclean
# Copyright (C) 1998 by ITU
use
strict;
sub
usage {
STDERR
<<END;
usage: $0 file1 [file2 file3 ...]
END
exit
1;
}
sub
print_version {
my
(
$version
) =
$HTML::Clean::VERSION
;
"$0\n\tHTML::Clean::VERSION: $version\n\n"
;
exit
0;
}
=head1 NAME
htmlclean - a small script to clean up existing HTML
=head1 SYNOPSIS
B<htmlclean [-v] [-V] file1 [file2 file3 ...]>
=head1 DESCRIPTION
This program provides a command-line interface to the HTML::Clean
module, which can help you to provide more compatible, smaller HTML
files at the expense of reducing the human readability of the HTML
code. In some cases you may be able to reduce the size of your HTML
by up to 50%!
The HTML::Clean library provides a number of features that improve your
HTML for browsing and serving:
B<htmlclean> passes each file given on the command line to the library
and writes out the new HTML according to the specified options. The
default is to create a backup file and replace the file with cleaned HTML.
=over 6
=item Removing whitespace, Comments and other useless or redundant constructs
=item Insuring that font tags work across multiple operating systems
=back
For full details see the documentations for L<HTML::Clean> itself.
=head1 OPTIONS
=over 4
=item C<-V>
Print the version of the program.
=item C<-v>
Verbose mode. Print out the original and final file sizes, plus the
compression percent. For example:
5261 4065 22% /tmp/development-es.html
5258 4061 22% /tmp/development-fr.html
4651 3683 20% /tmp/development.html
=back
=head1 SEE ALSO
For the library, see L<HTML::Clean>
=head1 AUTHOR
C<htmlclean> is written by Paul Lindner, <paul.lindner@itu.int>
=head1 COPYRIGHT
Copyright (c> 1998 by ITU under the same terms as Perl.
=cut
usage()
if
(
$#ARGV
== -1);
usage()
if
(
$#ARGV
>=0 &&
$ARGV
[0] eq
'-?'
);
use
HTML::Clean;
use
Getopt::Long;
my
(
%opts
);
$Getopt::Long::getopt_compat
= 1;
# avoid parsing +'s as options (doesn't work!)
&Getopt::Long::config
(
qw(no_ignore_case no_getopt_compat)
);
&GetOptions
(\
%opts
,
qw(v V t=s 1 2 3 4 5 6 7 8 9)
);
&print_version
if
(
$opts
{
'V'
});
&usage
if
(
$#ARGV
== -1); # we MUST have at least one file
my
(
$verbose
) =
$opts
{
'v'
};
my
$level
= 9;
foreach
my
$i
(1, 2, 3, 4, 5, 6, 7, 8, 9) {
$level
=
$i
if
(
$opts
{
$i
});
}
&main
(
$level
, \
@ARGV
);
exit
0;
sub
main {
my
(
$level
,
$files
) =
@_
;
my
$h
= new HTML::Clean();
# Just a empty holder..
print_error(
'initializing...'
)
if
(!
$h
);
$h
->level(
$level
);
foreach
my
$f
(
@$files
) {
my
$result
=
$h
->initialize(
$f
);
print_error(
$f
)
if
(
$result
== 0);
my
$d
=
$h
->data();
my
$origlen
=
length
(
$$d
);
# add options to control these...
$h
->compat();
$h
->strip();
my
$newlen
=
length
(
$$d
);
my
$pct
= 0;
if
(
$origlen
> 0) {
$pct
= (100 * (
$origlen
-
$newlen
)) /
$origlen
;
}
printf
"%6d %6d %2d%% %s\n"
,
$origlen
,
$newlen
,
$pct
,
$f
if
(
$verbose
);
# Okay, now move the files around..
rename
(
$f
,
"$f.bak"
) ||
die
"Cannot rename '$f': $!\n"
;
open
(output,
">$f"
) ||
die
"Cannot overwrite '$f': $!\n"
;
output
$$d
;
close
(output);
}
}
sub
print_error {
my
(
$msg
) =
@_
;
STDERR
<<END;
$0: $msg ($!)
END
exit
(1);
}