PRWIRE/PRWire.pm - metacpan.org


            
              1
2
3
4
5
6
7
—
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
              # PRWire.pm
# by Jim Smyser
# Copyright (C) 2000 by Jim Smyser 
# $Id: PRWire.pm,v 1.00 2000/04/07 02:33:19 jims Exp $
package WWW::Search::PRWire;
=head1 NAME
WWW::Search::PRWire - class for viewing latest Press Releases 
=head1 SYNOPSIS
  use WWW::Search;
 my $search = new WWW::Search('PRWire');
 $search->native_query(WWW::Search::escape_query('NULL'));
  while (my $result = $search->next_result())
    { 
    print $result->url, "\n"; 
    }
=head1 DESCRIPTION
Class for WWW::Search for fetching and parsing latest PRWire news 
headlines. F<http://www.prnewswire.com>. This code should
serve as an example of using WWW::Search to parse useful data
from pages other than its usual searching methods. Yes, WWW::Search
is useful for retrieving data when there is "next" pages to get and
yet, no search interface to interact with. 
See USAGE.
This class exports no public interface; all interaction should
be done through WWW::Search objects.
=head1 USAGE
PRWire.pm does not deal with options $native_query (Query) or any
others except $maximum_to_retrieve. In a sense, this is not a
"search" backend. It simply parses all the latest headlines and
retrieves more as defined by $maximum_to_retrieve.
If you use with WebSearch or AutoSearch you will need to to send a
bogus query to prevent complaining of NO query. Search for NULL or
something. On a web page you could just have a button with a caption
"View Latest Press Releases" and optionally perhaps a option for how
many to return.
$result->title returns just the date and time of the article, so you
will also want to print $result->description after $result->title
so users will have descriptive text identifying the article. 
=head1 AUTHOR
C<WWW::Search::PRWire> is written and maintained by Jim Smyser
<jsmyser@bigfoot.com>.
=head1 COPYRIGHT
(c) PR Newswire Redistribution, retransmission, republication or
commercial exploitation of the contents of PR Newswire are expressly
prohibited without the written consent of PR Newswire.
WWW::Search Copyright (c) 1996-1998 University of Southern California.
All rights reserved.                                            
                                                                
THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
=cut
#'
#####################################################################
require Exporter;
@EXPORT = qw();
@EXPORT_OK = qw();
@ISA = qw(WWW::Search Exporter);
$VERSION = '1.0';
use Carp ();
use WWW::Search(qw(generic_option strip_tags));
require WWW::SearchResult;
sub native_setup_search {
        my($self, $native_query, $native_options_ref) = @_;
        $self->{_debug} = $native_options_ref->{'search_debug'};
        $self->{_debug} = 2 if ($native_options_ref->{'search_parse_debug'});
        $self->{_debug} = 0 if (!defined($self->{_debug}));
        $self->{agent_e_mail} = 'jsmyser@bigfoot.com';
        $self->user_agent('user');
        $self->{_next_to_retrieve} = 1;
        $self->{'_num_hits'} = 0;
             if (!defined($self->{_options})) {
             $self->{_options} = {
             'search_url' => 'http://www.prnewswire.com/tnw/tnw.shtml',
             };
             }
         
        my $options_ref = $self->{_options};
        if (defined($native_options_ref))
             {
        # Copy in new options.
        foreach (keys %$native_options_ref)
             {
        $options_ref->{$_} = $native_options_ref->{$_};
             } 
             } 
        # Process the options.
        my($options) = '';
        foreach (sort keys %$options_ref)
             {
        next if (generic_option($_));
        $options .= $_ . '=' . $options_ref->{$_} . '&';
             }
        chop $options;
        $self->{_next_url} = $self->{_options}{'search_url'};
             } 
# private
sub native_retrieve_some {
        my ($self) = @_;
        print STDERR "**PRWire Get Latest**\n" if $self->{_debug};
             
        # Fast exit if already done:
        return undef if (!defined($self->{_next_url}));
             
        # If this is not the first page of results, sleep so as to not
        # overload the server:
        $self->user_agent_delay if 1 < $self->{'_next_to_retrieve'};
             
        # Get some if were not already scoring somewhere else:
        print STDERR "*Sending request (",$self->{_next_url},")\n" if $self->{_debug};
        my($response) = $self->http_request('GET', $self->{_next_url});
         
        $self->{response} = $response;
        if (!$response->is_success)
             {
        return undef;
             }
        $self->{'_next_url'} = undef;
        print STDERR "**Response\n" if $self->{_debug};
        # parse the output
        my ($HEADER, $HITS, $TITLE, $DESC) = qw(HE HI TI DE);
        my $hits_found = 0;
        my $state = $HEADER;
        my $hit = ();
        foreach ($self->split_lines($response->content()))
             {
        next if m@^$@; # short circuit for blank lines
        print STDERR " $state ===$_=== " if 2 <= $self->{'_debug'};
         
        if (m|<TITLE>.*?</TITLE>|i) 
        {
        $state = $HITS;
        } 
   elsif ($state eq $HITS && m@^<A HREF="(.*)">@i) 
        {
        print "**Found Hit URL**\n" if 2 <= $self->{_debug};
        my ($url) = ($1);
        if (defined($hit))
            {
        push(@{$self->{cache}}, $hit);
            };
        $hit = new WWW::SearchResult;
        $hits_found++;
        $url = "http://www.prnewswire.com" . $url;
        $hit->add_url($url);
        $state = $TITLE;
        } 
   elsif ($state eq $TITLE && m|^(.+)</A>|i) 
        {
        my $sTitle = $1;
        $hit->title($sTitle);
        $state = $DESC;
        } 
    elsif ($state eq $DESC && m|^<DD>(.+)|i) 
        {
        $hit->description($1);
        $state = $HITS;
        } 
    elsif ($state eq $HITS && m|Click\s<A HREF="(.*?)">.*?<IMG SRC.*?>|i) 
        {
        $sURL = $1;
        $self->{'_next_url'} = 'http://www.prnewswire.com' . $sURL;
        print STDERR " **Next Tag is: ", $self->{'_next_url'}, "\n" if 2 <= $self->{_debug};
             } 
          else 
             {
        print STDERR "**Nothing matched.**\n" if 2 <= $self->{_debug};
             }
             } 
        if (defined($hit)) 
             {
             push(@{$self->{cache}}, $hit);
             }
             return $hits_found;
             } 
1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)