use
5.34.0;
no
warnings
'experimental::try'
;
our
$VERSION
=
'3.3.2'
;
$XML::Atom::ForceUnicode
= 1;
has
'config'
=> (
is
=>
'ro'
,
isa
=>
'HashRef'
,
);
has
'ua'
=> (
is
=>
'ro'
,
isa
=>
'LWP::UserAgent'
,
lazy_build
=> 1
);
sub
_build_ua {
my
$self
=
shift
;
my
$ua
= LWP::UserAgent->new(
agent
=>
"Perlanet/$VERSION"
,
timeout
=> 20,
);
$ua
->show_progress(1)
if
-t STDOUT;
$ua
->env_proxy;
return
$ua
;
}
has
'cutoff_duration'
=> (
isa
=>
'Perlanet::DateTime::Duration'
,
is
=>
'ro'
,
lazy_build
=> 1,
coerce
=> 1,
);
sub
_build_cutoff_duration {
return
{
years
=> 1_000 };
}
has
'cutoff'
=> (
isa
=>
'Perlanet::DateTime'
,
is
=>
'ro'
,
lazy_build
=> 1,
coerce
=> 1,
);
sub
_build_cutoff {
return
DateTime->now -
shift
->cutoff_duration;
}
has
'spam_regex'
=> (
isa
=>
'Maybe[Str]'
,
is
=>
'ro'
,
lazy_build
=> 1,
);
sub
_build_spam_regex {
my
$self
=
shift
;
return
unless
$self
->config->{spam_filter};
my
$re
=
'('
.
join
(
'|'
, @{
$self
->config->{spam_filter}}) .
')'
;
return
$re
;
}
has
'entries'
=> (
isa
=>
'Int'
,
is
=>
'ro'
,
default
=> 10,
);
has
'entries_per_feed'
=> (
isa
=>
'Int'
,
is
=>
'ro'
,
default
=> 5,
);
has
'feeds'
=> (
isa
=>
'ArrayRef'
,
is
=>
'ro'
,
default
=>
sub
{ [] }
);
has
'author'
=> (
isa
=>
'HashRef'
,
is
=>
'ro'
,
);
has
$_
=> (
isa
=>
'Str'
,
is
=>
'ro'
,
)
for
qw( self_link title description url agent )
;
has
entry_sort_order
=> (
isa
=>
'Str'
,
is
=>
'ro'
,
default
=>
'modified'
,
);
sub
fetch_page {
my
$self
=
shift
;
my
(
$url
) =
@_
;
return
URI::Fetch->fetch(
$url
,
UserAgent
=>
$self
->ua,
ForceResponse
=> 1,
);
}
sub
fetch_feeds {
my
$self
=
shift
;
my
(
$feeds
) =
@_
;
my
@valid_feeds
;
for
my
$feed
(
@$feeds
) {
next
unless
$feed
->feed;
my
$response
=
$self
->fetch_page(
$feed
->feed);
if
(
$response
->is_error) {
warn
'Error retrieving '
.
$feed
->feed,
"\n"
;
warn
$response
->http_response->status_line,
"\n"
;
next
;
}
unless
(
length
$response
->content) {
warn
'No data returned from '
.
$feed
->feed,
"\n"
;
next
;
}
try
{
my
$data
=
$response
->content;
die
'No data from '
.
$feed
->feed .
"\n"
unless
$data
;
my
$xml_feed
= XML::Feed->parse(\
$data
);
unless
(
$xml_feed
) {
warn
"Can't make an object from "
.
$feed
->feed .
"\n"
;
my
$content_type
=
$response
->content_type;
warn
"Content type: $content_type\n"
if
$content_type
;
my
$extract
=
substr
$data
, 0, 100;
die
"[$extract]\n"
;
}
$feed
->_xml_feed(
$xml_feed
);
$feed
->title(
$xml_feed
->title)
unless
$feed
->title;
push
@valid_feeds
,
$feed
;
}
catch
(
$e
) {
warn
'Errors parsing '
.
$feed
->feed,
"\n"
;
warn
"$e\n"
if
defined
$e
;
}
}
return
\
@valid_feeds
;
}
sub
select_entries {
my
$self
=
shift
;
my
(
$feeds
) =
@_
;
my
$date_zero
= DateTime->from_epoch(
epoch
=> 0);
my
@feed_entries
;
for
my
$feed
(
@$feeds
) {
my
@entries
=
grep
{ !
$self
->is_spam_entry(
$_
) }
$feed
->_xml_feed->entries;
for
(
@entries
) {
unless
(
$_
->issued or
$_
->modified) {
$_
->issued(
$date_zero
);
$_
->modified(
$date_zero
);
}
if
(
$_
->issued && !
$_
->modified) {
$_
->modified(
$_
->issued);
}
}
@entries
=
$self
->sort_entries(\
@entries
)->@*;
@entries
=
$self
->cutoff_entries(\
@entries
)->@*;
my
$number_of_entries
=
defined
$feed
->max_entries ?
$feed
->max_entries
:
$self
->entries_per_feed;
if
(
$number_of_entries
and
@entries
>
$number_of_entries
) {
$#entries
=
$number_of_entries
- 1;
}
for
(
@entries
) {
push
@feed_entries
,
Perlanet::Entry->new(
_entry
=>
$_
,
feed
=>
$feed
);
}
}
return
\
@feed_entries
;
}
sub
is_spam_entry {
my
$self
=
shift
;
my
(
$entry
) =
@_
;
return
unless
$self
->spam_regex;
my
$re
=
$self
->spam_regex;
my
$title
=
$entry
->title;
my
$content
=
$entry
->content;
return
1
if
$title
=~ /
$re
/;
return
1
if
$content
=~ /
$re
/;
return
;
}
sub
sort_entries {
my
$self
=
shift
;
my
(
$entries
) =
@_
;
my
@entries
;
if
(
$self
->entry_sort_order eq
'modified'
) {
@entries
=
sort
{
(
$b
->modified ||
$b
->issued)
<=>
(
$a
->modified ||
$a
->issued)
}
@$entries
;
}
elsif
(
$self
->entry_sort_order eq
'issued'
) {
@entries
=
sort
{
(
$b
->issued ||
$b
->modified)
<=>
(
$a
->issued ||
$a
->modified)
}
@$entries
;
}
else
{
die
'Invalid entry sort order: '
.
$self
->entry_sort_order;
}
return
\
@entries
;
}
sub
cutoff_entries {
my
$self
=
shift
;
my
(
$entries
) =
@_
;
my
@entries
=
grep
{
(
$_
->issued ||
$_
->modified) >
$self
->cutoff
}
@$entries
;
return
\
@entries
;
}
sub
build_feed {
my
$self
=
shift
;
my
(
$entries
) =
@_
;
my
%feed_data
= (
modified
=> DateTime->now,
feed
=>
$self
->config->{url},
);
for
(
qw[title description]
) {
$feed_data
{
$_
} =
$self
->
$_
if
defined
$self
->
$_
;
}
if
(
defined
$self
->author) {
$feed_data
{author} =
$self
->author->{name}
if
defined
$self
->author->{name};
$feed_data
{email} =
$self
->author->{email}
if
defined
$self
->author->{email};
}
if
(
defined
$self
->url) {
$feed_data
{self_link} =
$self
->url;
$feed_data
{id} =
$self
->url
}
$feed_data
{entries} =
$entries
;
my
$f
= Perlanet::Feed->new(
%feed_data
,
);
return
$f
;
}
sub
clean_html {
my
$self
=
shift
;
my
(
$entry
) =
@_
;
return
$entry
;
}
sub
clean_entries {
my
$self
=
shift
;
my
(
$entries
) =
@_
;
my
@clean_entries
;
foreach
(
@$entries
) {
if
(
my
$body
=
$_
->content->body) {
my
$cleaned
=
$self
->clean_html(
$body
);
$_
->content->body(
$cleaned
);
}
if
(
my
$summary
=
$_
->summary->body) {
my
$cleaned
=
$self
->clean_html(
$summary
);
$_
->summary->body(
$cleaned
);
}
push
@clean_entries
,
$_
;
}
return
\
@clean_entries
;
}
sub
render {
my
$self
=
shift
;
my
(
$feed
) =
@_
;
}
sub
run {
my
$self
=
shift
;
my
$feeds
=
$self
->fetch_feeds(
$self
->feeds);
my
$selected
=
$self
->select_entries(
$feeds
);
my
$sorted
=
$self
->sort_entries(
$selected
);
my
$cleaned
=
$self
->clean_entries(
$sorted
);
my
$feed
=
$self
->build_feed(
$cleaned
);
$self
->render(
$feed
);
}
no
Moose;
__PACKAGE__->meta->make_immutable;
1;