WWW::Scraper::Lite
$LastChangedRevision: 15 $
my $domain = 'http://devsite.local/'; my $scraper = WWW::Scraper::Lite->new(); $scraper->crawl($domain, { '//a' => sub { # handler for all 'a' tags my ($scraper, $nodes) = @_; $scraper->enqueue(grep { $_ =~ m{^$domain} } # only this domain map { $scraper->url_remove_anchor($_) } # only index pages without #anchor map { $scraper->url_make_absolute($_) } # indexer needs absolute URLs map { $_->{href} } # pull href out of the 'a' DOM node @{$nodes}); }, '/*' => sub { # handler for all content my ($scraper, $nodes) = @_; print $scraper->{current}->{response}->content; # do something useful with HTTP response }, } );
my $oScraper = WWW::Scraper::Lite->new();
my $oUA = $oScraper->ua();
$oScraper->crawl($sStartURL, $hrCallbacks);
$oScraper->enqueue(@aURLs);
my $sURL = $oScraper->dequeue();
my $hrCurrentData = $oScraper->current;
my $sURLout = $oScraper->url_remove_anchor($sURLin);
$Author: Roger Pettett,,,$
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
To install WWW::Scraper::Lite, copy and paste the appropriate command in to your terminal.
cpanm
cpanm WWW::Scraper::Lite
CPAN shell
perl -MCPAN -e shell install WWW::Scraper::Lite
For more information on module installation, please visit the detailed CPAN module installation guide.