#!/usr/local/bin/perl
our
$VERSION
=
do
{
my
@r
= (
q$Revision: 1.5 $
=~ /\d+/g );
sprintf
"%d."
.
"%03d"
x
$#r
,
@r
};
my
(
$debug
) = 0;
sub
native_setup_search {
my
(
$self
,
$native_query
,
$native_opt
) =
@_
;
my
(
$native_url
);
my
(
$default_native_url
) =
if
(
defined
(
$native_opt
)) {
if
(
$self
->{
'search_url'
} &&
$native_opt
->{
'search_args'
}) {
$native_url
=
$native_opt
->{
'search_url'
} .
"?"
.
$native_opt
->{
'search_args'
};
}
$debug
= 1
if
(
$native_opt
->{
'search_debug'
});
}
$native_url
=
$default_native_url
if
(!
$native_url
);
my
$how
=
$self
->{
'search_how'
};
if
(
defined
(
$how
)) {
if
(
$how
eq
'match%5Fany'
) {
$native_query
=~ s/ and //g;
}
elsif
(
$how
eq
'match%5Fall'
) {
$native_query
=~ s/\s+and\s+//g;
$native_query
=~ s/\s+/ and /g;
}
elsif
(
$how
eq
'match%5Fphrase'
) {
$native_query
=~ s/[\'\"]+//g;
$native_query
=~ s/\+/ /g;
$native_query
=
"'$native_query'"
;
}
elsif
(
$how
eq
'match_boolean'
) {
;
}
}
$native_url
=~ s/
%s
/
$native_query
/g;
$native_url
=~ s/
%n
/40/g;
$native_url
.=
"&MaxRecordsPerPage=40"
if
(
$native_url
!~ /MaxRecordsPerPage=/);
$self
->user_agent(1);
$self
->{_next_to_retrieve} = 0;
$self
->{_base_url} =
$native_url
;
$self
->{_next_url} =
$native_url
;
}
sub
native_retrieve_some
{
my
(
$self
) =
@_
;
my
(
$hit
) = ();
my
(
$hits_found
) = 0;
my
(
$step
) = 0;
return
undef
if
(!
defined
(
$self
->{_next_url}));
my
$method
=
$self
->{search_method};
if
(!
defined
(
$method
)) {
$method
=
'GET'
;
}
warn
"$method "
,
$self
->{_next_url},
"\n"
if
(
$debug
);
my
$response
=
$self
->http_request(
$method
,
$self
->{_next_url});
$self
->{response} =
$response
;
if
(!
$response
->is_success) {
warn
" --- HTTP request failed: "
,
$response
->as_string,
"\n"
if
(
$debug
);
return
(
undef
);
};
my
$results
=
$response
->content();
@{
$self
->{cache}} = (1);
@{
$self
->{cache}} = ();
if
(!
$results
) {
return
(0);
}
my
(
$url
);
my
(
@lines
) =
$self
->split_lines(
$results
);
my
(
$mstitle
,
$msurl
,
$mssize
,
$mshits
,
$msdesc
);
(
$mstitle
,
$msurl
,
$mssize
,
$mshits
,
$msdesc
) = (
''
,
''
, 0, 0,
undef
);
while
(
$#lines
> -1) {
$_
=
shift
(
@lines
);
if
(
$step
== 0) {
(
$mstitle
,
$msurl
,
$mssize
,
$msdesc
) = (
''
,
''
, 0,
undef
);
if
(
$_
=~ m/matching the query/) {
$_
=~ s/[^0-9]//g;
$mshits
=
$_
;
}
}
if
(
$step
== 0) {
if
(
$_
=~ m/<dt>/) {
$step
= 1;
}
}
elsif
(
$step
== 1) {
if
(
$_
=~ m/<b><a href=/) {
$_
=~ s/^.*\">//;
$_
=~ s/<\/a>.*$//;
$mstitle
=
$_
;
$step
= 2;
}
}
elsif
(
$step
== 2) {
if
(
$_
=~ m/Abstract: /) {
$msdesc
=
$_
;
$msdesc
=~ s/^.
*Abstract
: //;
$step
= 3;
}
}
elsif
(
$step
== 3) {
if
(
$_
=~ m/cite/) {
$step
= 4;
}
else
{
s/\s+/ /;
$msdesc
.=
$_
;
}
}
elsif
(
$step
== 4) {
if
(
$_
=~ m/a href=/) {
$_
=~ s/^.*\">//;
$_
=~ s/<\/a.*$//;
$msurl
=
$_
;
$step
= 5;
}
}
elsif
(
$step
== 5) {
if
(
$_
=~ m/size.
*bytes
/) {
$_
=~ s/^.*- size //;
$_
=~ s/ - .*$//;
$_
=~ s/,//;
$mssize
=
$_
;
$step
= 0;
my
(
$hit
) = new WWW::SearchResult;
my
(
$linkobj
) = new URI::URL
$msurl
,
$self
->{_next_url};
$hit
->add_url(
$linkobj
->
abs
->as_string);
$hit
->title(
$mstitle
);
$hit
->size(
$mssize
);
$msdesc
=~ s,<[A-Za-z/]*?>,,g;
$hit
->description(
$msdesc
);
$hits_found
++;
$hit
->score(800 - (20 *
$hits_found
));
$hit
->normalized_score(800 - (20 *
$hits_found
));
push
(@{
$self
->{cache}},
$hit
);
}
}
}
if
(
$mshits
) {
$self
->approximate_result_count(
$mshits
);
}
else
{
$self
->approximate_result_count(
$hits_found
);
}
$self
->{_next_url} =
undef
;
return
(
$hits_found
);
}
1;