describe
"KeywordsSpider::Core"
=>
sub
{
my
$string
=
q{}
;
open
my
(
$fh
),
'>'
, \
$string
;
my
@keywords
=
qw/child abuse death pussy/
;
my
%allowed_keywords
= (
child
=> 1,
pussy
=> 1,
);
my
$debug
= 1;
my
$web_depth
= 5;
my
%spider_params
= (
output_file
=>
$fh
,
keywords
=> \
@keywords
,
allowed_keywords
=> \
%allowed_keywords
,
debug_enabled
=>
$debug
,
web_depth
=>
$web_depth
,
);
my
$url_params
= {
'want_spider'
=> 1,
'fetched'
=> 0,
'depth'
=> 0,
};
it
"loads ok"
=>
sub
{
pass;
};
it
"error response"
=>
sub
{
my
$spider
= KeywordsSpider::Core->new(
%spider_params
,
links
=> {
$url
=>
$url_params
,},
);
my
$response
= Test::MockObject->new();
$response
->set_false(
'is_success'
)
->set_always(
'status_line'
, 400);
LWP::UserAgent->expects(
'request'
)->returns(
$response
)->at_least(1);
$spider
->spider_links();
like(
$spider
->output,
qr/ERROR:400/
);
};
it
"adds a link"
=>
sub
{
my
$html
=
<<EF;
<html>
<head>
</head>
<body>
<a href="trala.html"></a>
</body>
</html>
EF
my
$root
= HTML::TreeBuilder->new_from_content(
$html
);
my
$spider
= KeywordsSpider::Core->new(
%spider_params
,
links
=> {
$url
=>
$url_params
,},
root
=>
$root
,
);
my
$depth
= 1;
$spider
->add_links_from_root(
$depth
,
$url
);
};
it
"does not add link from another domain"
=>
sub
{
my
$html
=
<<EF;
<html>
<head>
</head>
<body>
</body>
</html>
EF
my
$root
= HTML::TreeBuilder->new_from_content(
$html
);
my
$spider
= KeywordsSpider::Core->new(
%spider_params
,
links
=> {
$url
=>
$url_params
,},
root
=>
$root
,
origin_domain
=>
'domain.sk'
,
);
my
$depth
= 1;
$spider
->add_links_from_root(
$depth
,
$url
);
};
describe
"check_website"
=>
sub
{
my
$OK
= 0;
my
$ALERT
= 1;
it
"OKs"
=>
sub
{
my
$html
=
<<HTMLEND;
<html>
<head>
</head>
<body>
tralala trololo
</body>
</html>
HTMLEND
my
$root
= HTML::TreeBuilder->new_from_content(
$html
);
my
$spider
= KeywordsSpider::Core->new(
%spider_params
,
links
=> {
$url
=>
$url_params
,},
root
=>
$root
,
);
is(
$spider
->check_website(
$url
),
$OK
);
};
it
"ALERTs"
=>
sub
{
my
$html
=
<<HTMLEND;
<html>
<head>
<meta name="keywords" content="pussy">
</head>
<body>
tralala death trololo
</body>
</html>
HTMLEND
my
$root
= HTML::TreeBuilder->new_from_content(
$html
);
my
$spider
= KeywordsSpider::Core->new(
%spider_params
,
links
=> {
$url
=>
$url_params
,},
root
=>
$root
,
);
is(
$spider
->check_website(
$url
),
$ALERT
);
like(
$spider
->output,
qr/ALERT possible bad content $url death pussy/
);
like(
$spider
->output,
qr/found keywords: 2/
);
};
};
it
"uses defaults if not specified"
=>
sub
{
my
$spider
= KeywordsSpider::Core->new(
output_file
=>
$fh
,
links
=> {
$url
=>
$url_params
,},
keywords
=> \
@keywords
,
);
is(
$spider
->debug_enabled, 0);
is(
$spider
->web_depth, 3);
};
};
runtests;