|
use Importer 'NewsExtractor::TextUtil' => qw(u) ; has tx => (
required => 1, is => 'ro' ,
isa => InstanceOf[ 'Mojo::Transaction::HTTP' ] );
has schema_ld => (
required => 0,
is => 'lazy' ,
isa => HashRef,
builder => 1,
);
sub _build_schema_ld {
my ( $self ) = @_ ;
my $el = $self ->dom->at( 'script[type="application/ld+json"]' ) or return {};
return from_json( $el ->text );
}
sub journalist {
my ( $self ) = @_ ;
return u( $self ->schema_ld->{author}{name});
}
sub headline {
my ( $self ) = @_ ;
return u( $self ->schema_ld->{headline});
}
sub dateline {
my ( $self ) = @_ ;
return u( $self ->schema_ld->{datePublished});
}
sub content_text {
my ( $self ) = @_ ;
my $text = $self ->schema_ld->{articleBody} // $self ->schema_ld->{description} // '' ;
return u( $text );
}
1;
|