#!/usr/bin/perl
use
5.010001;
use
Fatal
qw(open close)
;
my
$verbose
= 0;
Carp::croak(
"usage: $PROGRAM_NAME [--verbose=[0|1|2] [distribution]"
)
if
not Getopt::Long::GetOptions(
'verbose=i'
=> \
$verbose
);
my
$most_recent_distribution
=
pop
@ARGV
;
if
( not
$most_recent_distribution
) {
my
@distributions
=
grep
{/\A Marpa [-] R2 [-] \d /xms}
sort
map
{
$_
->[2] }
CPAN::Shell->expand(
'Author'
,
'JKEGL'
)->ls(
'Marpa-R2-*'
, 2 );
$most_recent_distribution
=
pop
@distributions
;
$most_recent_distribution
=~ s/\.tar\.gz$//xms;
}
my
$marpa_doc_base
=
$cpan_base
.
'/~jkegl/'
.
"$most_recent_distribution/"
;
if
(
$verbose
) {
print
"Starting at $marpa_doc_base\n"
or Carp::croak(
"Cannot print: $ERRNO"
);
}
$OUTPUT_AUTOFLUSH
= 1;
my
@doc_urls
= ();
{
my
$p
= HTML::LinkExtor->new();
my
$ua
= LWP::UserAgent->new;
my
$response
=
$ua
->request( HTTP::Request->new(
GET
=>
$marpa_doc_base
),
sub
{
$p
->parse(
$_
[0] ) } );
my
$page_response_status_line
=
$response
->status_line;
if
(
$response
->code != OK ) {
Carp::croak(
'PAGE: '
,
$page_response_status_line
,
q{ }
,
$marpa_doc_base
);
}
my
@links
=
map
{
$_
->[2] }
grep
{
$_
->[0] eq
'a'
and
$_
->[1] eq
'href'
and
$_
->[2] !~ /^[
$p
->links();
@doc_urls
=
grep
{/^pod\//xms}
@links
;
}
my
%url_seen
= ();
my
$at_col_0
= 1;
PAGE:
for
my
$url
(
@doc_urls
) {
$url
=
$marpa_doc_base
.
$url
;
print
"Examining document $url"
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 0;
my
$p
= HTML::LinkExtor->new();
my
$ua
= LWP::UserAgent->new;
my
$response
=
$ua
->request( HTTP::Request->new(
GET
=>
$url
),
sub
{
$p
->parse(
$_
[0] ) } );
my
$page_response_status_line
=
$response
->status_line;
if
(
$response
->code != OK ) {
say
'PAGE: '
,
$page_response_status_line
,
q{ }
,
$url
or Carp::croak(
"Cannot print: $ERRNO"
);
next
PAGE;
}
my
@links
=
map
{
$_
->[2] }
grep
{
$_
->[0] eq
'a'
and
$_
->[1] eq
'href'
}
$p
->links();
LINK:
for
my
$link
(
@links
) {
given
(
$link
) {
when
(/\A\//xms) {
}
when
(/\A[
$link
=
$url
.
$link
;
}
}
if
(
$url_seen
{
$link
}++ ) {
if
(
$verbose
>= 2 ) {
say
{
*STDERR
}
"Already tried $link"
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 1;
}
next
LINK;
}
if
(
$verbose
> 1 ) {
$at_col_0
or
print
"\n"
or Carp::croak(
"Cannot print: $ERRNO"
);
say
{
*STDERR
}
"Trying $link"
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 1;
}
my
$link_response
=
$ua
->request( HTTP::Request->new(
GET
=>
$link
) );
if
(
$link_response
->code != OK ) {
$at_col_0
or
print
"\n"
or Carp::croak(
"Cannot print: $ERRNO"
);
say
'FAIL: '
,
$link_response
->status_line,
q{ }
,
$link
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 1;
next
LINK;
}
if
( not
$verbose
) {
print
{
*STDERR
}
q{.}
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 0;
}
if
(
$verbose
) {
$at_col_0
or
print
"\n"
or Carp::croak(
"Cannot print: $ERRNO"
);
my
$uri
=
$link_response
->base();
say
{
*STDERR
}
"FOUND $link"
or Carp::croak(
"Cannot print: $ERRNO"
);
say
{
*STDERR
}
" uri: $uri"
or Carp::croak(
"Cannot print: $ERRNO"
);
if
(
$verbose
>= 3 ) {
for
my
$redirect
(
$link_response
->redirects() ) {
my
$redirect_uri
=
$redirect
->base();
say
{
*STDERR
}
" redirect: $redirect_uri"
or Carp::croak(
"Cannot print: $ERRNO"
);
}
}
$at_col_0
= 1;
}
}
$at_col_0
or
print
"\n"
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 1;
if
(
$verbose
) {
say
" PAGE: $page_response_status_line: $url"
or Carp::croak(
"Cannot print: $ERRNO"
);
$at_col_0
= 1;
}
}