FEAR::API - There's no fear with this elegant site scraper
FEAR::API is a tool that helps reduce your time creating site scraping scripts and help you do it in an much more elegant way. FEAR::API combines many strong and powerful features from various CPAN modules, such as LWP::UserAgent, WWW::Mechanize, Template::Extract, Encode, HTML::Parser, etc. and digests them into a deeper Zen.
More documentation will come sooooooner or later.
use FEAR::API -base;
fetch("google.com") > my $content; my $content = fetch("google.com")->document->as_string;
getprint("google.com"); print fetch("google.com")->document->as_string; fetch("google.com"); print $$_; fetch("google.com") | _print;
getstore("google.com"); url("google.com")->() | _save_as("google.html"); fetch("google.com") | io('google.html');
url("google.com")->() >> _self; &$_ while $_;
(url("google.com")->() >> _self) | _save_as_tree("./root"); $_->() | _save_as_tree("./root") while $_;
url("google.com")->() >> _self; &$_ >> _self while $_;
(url("google.com")->() >> _self) | _save_as_tree("./root"); while($_){ (&$_ | _save_as_tree("./root")) >> _self; }
url("google.com")->()->follow_link(n => 2);
print Dumper fetch("google.com")->links;
url("google.com")->(); submit_form( form_number => 1, fields => { q => "Kill Bush" } );
url("[% FOREACH i = ['a'..'z'] %] http://some.site/[% i %] [% END %]"); &$_ while $_;
url("google.com")->() >> _self; pfetch(sub{ local $_ = shift; print join q/ /, title, current_url, document->size, $/; });
url("google.com")->() >> [ qr(^http:) => _self, qr(google) => \my @l, qr(google) => sub { print ">>>".$_->[0],$/ } ]; $_->() while $_; print Dumper \@l;
fetch("http://google.com") ->report_links( qr(^http:) => _self, qr(google) => \my @l, qr(google) => sub { print ">>>".$_->[0],$/ } ); fetch while has_more_urls; print Dumper \@l;
url("google.com")->() >> { qr(^http:) => _self, qr(google) => \my @l, qr(google) => sub { print ">>>".$_->[0],$/ } }; $_->() while $_; print Dumper \@l;
fetch("http://google.com") ->fallthrough_report(1) ->report_links( qr(^http:) => _self, qr(google) => \my @l, qr(google) => sub { print ">>>".$_->[0],$/ } ); fetch while has_more_urls; print Dumper \@l;
url("http://search.cpan.org/recent")->(); submit_form( form_name => "f", fields => { query => "perl" }); template("<!--item-->[% p %]<!--end item-->"); extract; print Dumper extresult;
url("http://search.cpan.org/recent")->(); submit_form( form_name => "f", fields => { query => "perl" }); preproc(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)); print document->as_string; # print content to STDOUT template("<!--item-->[% p %]<!--end item-->"); extract; print Dumper extresult;
url("http://search.cpan.org/recent")->(); submit_form( form_name => "f", fields => { query => "perl" }); preproc(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)); print $$_; # print content to STDOUT template("<!--item-->[% rec %]<!--end item-->"); extract; postproc(q($_->{rec} =~ s/<.+?>//g)); # Strip HTML tags print Dumper extresult;
fetch("http://search.cpan.org/recent"); submit_form( form_name => "f", fields => { query => "perl" }) | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)) | _template("<!--item-->[% rec %]<!--end item-->") | _result_filter(q($_->{rec} =~ s/<.+?>//g)); print Dumper \@$_;
fetch("http://search.cpan.org/recent"); submit_form( form_name => "f", fields => { query => "perl" }) | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)) | "<!--item-->[% rec %]<!--end item-->" | _result_filter(q($_->{rec} =~ s/<.+?>//g)); invoke_handler('Data::Dumper');
url("google.com")->() | _preproc(use => "html_to_null") | _preproc(use => "decode_entities") | _print;
fetch("http://search.cpan.org/recent"); submit_form( form_name => "f", fields => { query => "perl" }) | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)) | _template("<!--item-->[% rec %]<!--end item-->") | _result_filter(use => "html_to_null", qw(rec)); | _result_filter(use => "decode_entities", qw(rec)) print Dumper \@$_;
Copyright (C) 2006 by Yung-chung Lin (a.k.a. xern) <xern@cpan.org>
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself
To install FEAR::API, copy and paste the appropriate command in to your terminal.
cpanm
cpanm FEAR::API
CPAN shell
perl -MCPAN -e shell install FEAR::API
For more information on module installation, please visit the detailed CPAN module installation guide.