The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more
|
use Importer 'NewsExtractor::TextUtil' => 'reformat_dateline' ; sub _build_content_text {
my ( $self ) = @_ ;
sub {
( $_ ->parent->children->size == 1)
&& ( $_ ->text =~ m/^★/)
})-> map ( 'remove' );
$self ->dom->at( "#contentb div.raw-style > span:nth-child(1)" )->following_nodes()-> map ( 'remove' );
return $self ->SUPER::_build_content_text();
}
sub journalist {
my ( $self ) = @_ ;
my $guess = $self ->dom->at( '.fncnews-content > .info > span.small-gray-text' ) or return ;
my $text = $guess ->all_text;
my ( $name ) = $text =~ m/(?:東森新聞(?:\s*責任編輯)?)\s+(.+)$/;
return $name ;
}
sub dateline {
my ( $self ) = @_ ;
my $el = $self ->dom->at( ".fncnews-content > .info > span.small-gray-text" ) or return ;
return reformat_dateline( $el ->all_text(), '+08:00' );
}
1;
|