package XML::Feed; use strict; use warnings; use v5.10; use base qw( Class::ErrorHandler ); use Feed::Find; use URI::Fetch; use LWP::UserAgent; use Carp; use Scalar::Util 'blessed'; use Module::Pluggable search_path => "XML::Feed::Format", require => 1, sub_name => 'formatters'; our $VERSION = '0.63'; our $MULTIPLE_ENCLOSURES = 0; our @formatters; BEGIN { @formatters = __PACKAGE__->formatters; } sub new { my $class = shift; my $format = shift // 'Atom'; my $format_class = 'XML::Feed::Format::' . $format; eval "use $format_class"; Carp::croak("Unsupported format $format: $@") if $@; my $feed = bless {}, join('::', __PACKAGE__, "Format", $format); $feed->init_empty(@_) or return $class->error($feed->errstr); $feed; } sub init_empty { 1 } sub parse { my $class = shift; my($stream, $specified_format) = @_; return $class->error("Stream parameter is required") unless $stream; my $feed = bless {}, $class; my $xml = ''; if (blessed($stream) and $stream->isa('URI')) { $xml = $class->get_uri($stream); } elsif (ref($stream) eq 'SCALAR') { $xml = $$stream; } elsif (ref($stream)) { $xml = $class->get_fh($stream); } else { $xml = $class->get_file($stream); } return $class->error("Can't get feed XML content from $stream") unless $xml; my $format; if ($specified_format) { $format = $specified_format; } else { $format = $feed->identify_format(\$xml) or return $class->error($feed->errstr); } my $format_class = join '::', __PACKAGE__, "Format", $format; eval "use $format_class"; return $class->error("Unsupported format $format: $@") if $@; bless $feed, $format_class; $feed->init_string(\$xml) or return $class->error($feed->errstr); $feed; } sub get_file { my $class = shift; my ($filename) = @_; open my $fh, '<', $filename or return $class->error("Can't open $filename: $!"); my $xml = $class->get_fh($fh); close $fh; return $xml; } sub get_fh { my $class = shift; my ($fh) = @_; my $xml; while (read $fh, my($chunk), 8192) { $xml .= $chunk; } return $xml; } sub get_uri { my $class = shift; my ($stream) = @_; my $ua = LWP::UserAgent->new; $ua->agent(__PACKAGE__ . "/$VERSION"); $ua->env_proxy; # force allowing of proxies my $res = URI::Fetch->fetch($stream, UserAgent => $ua) or return $class->error(URI::Fetch->errstr); return $class->error("This feed has been permanently removed") if $res->status == URI::Fetch::URI_GONE(); return $res->content; } sub identify_format { my $feed = shift; my($xml) = @_; foreach my $class (@formatters) { my ($name) = ($class =~ m!([^:]+)$!); # TODO ugly my $tmp = $$xml; return $name if eval { $class->identify(\$tmp) }; return $feed->error($@) if $@; } return $feed->error("Cannot detect feed type"); } sub _get_first_tag { my $class = shift; my ($xml) = @_; ## Auto-detect feed type based on first element. This is prone ## to breakage, but then again we don't want to parse the whole ## feed ourselves. my $tag; while ($$xml =~ /<(\S+)/sg) { (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd; my $first = substr $t, 0, 1; $tag = $t, last unless $first eq '?' || $first eq '!'; } die ("Cannot find first element") unless $tag; $tag =~ s/^.*://; return $tag; } sub find_feeds { my $class = shift; my($uri) = @_; my @feeds = Feed::Find->find($uri) or return $class->error(Feed::Find->errstr); @feeds; } sub convert { my $feed = shift; my($format) = @_; my $new = XML::Feed->new($format); for my $field (qw( title link description language author copyright modified generator )) { my $val = $feed->$field(); next unless defined $val; $new->$field($val); } for my $entry ($feed->entries) { $new->add_entry($entry->convert($format)); } $new; } sub splice { my $feed = shift; my($other) = @_; my %ids = map { $_->id => 1 } $feed->entries; for my $entry ($other->entries) { $feed->add_entry($entry) unless $ids{$entry->id}++; } } sub _convert_entry { my $feed = shift; my $entry = shift; my $feed_format = ref($feed); $feed_format =~ s!^XML::Feed::Format::!!; my $entry_format = ref($entry); $entry_format =~ s!^XML::Feed::Entry::Format::!!; return $entry if $entry_format eq $feed_format; return $entry->convert($feed_format); } sub base; sub format; sub title; sub link; sub self_link; sub description; sub language; sub author; sub copyright; sub modified; sub generator; sub add_entry; sub entries; sub as_xml; sub id; sub image; sub tagline { shift->description(@_) } sub items { $_[0]->entries } # RFC 5005 sub first_link; sub last_link; sub previous_link; sub next_link; sub current_link; sub prev_archive_link; sub next_archive_link; 1; __END__ =head1 NAME XML::Feed - Syndication feed parser and auto-discovery =head1 SYNOPSIS use XML::Feed; my $feed = XML::Feed->parse(URI->new('http://example.com/atom.xml')) or die XML::Feed->errstr; print $feed->title, "\n"; for my $entry ($feed->entries) { } ## Find all of the syndication feeds on a given page, using ## auto-discovery. my @feeds = XML::Feed->find_feeds('http://example.com/'); =head1 DESCRIPTION I is a syndication feed parser for both RSS and Atom feeds. It also implements feed auto-discovery for finding feeds, given a URI. I supports the following syndication feed formats: =over 4 =item * RSS 0.91 =item * RSS 1.0 =item * RSS 2.0 =item * Atom =back The goal of I is to provide a unified API for parsing and using the various syndication formats. The different flavors of RSS and Atom handle data in different ways: date handling; summaries and content; escaping and quoting; etc. This module attempts to remove those differences by providing a wrapper around the formats and the classes implementing those formats (L and L). For example, dates are handled differently in each of the above formats. To provide a unified API for date handling, I converts all date formats transparently into L objects, which it then returns to the caller. =head1 USAGE =head2 XML::Feed->new($format) Creates a new empty I object using the format I<$format>. $feed = XML::Feed->new('Atom'); $feed = XML::Feed->new('RSS'); $feed = XML::Feed->new('RSS', version => '0.91'); =head2 XML::Feed->parse($stream) =head2 XML::Feed->parse($stream, $format) Parses a syndication feed identified by I<$stream> and returns an I object. I<$stream> can be any one of the following: =over 4 =item * Scalar reference A reference to string containing the XML body of the feed. =item * Filehandle An open filehandle from which the feed XML will be read. =item * File name The name of a file containing the feed XML. =item * URI object A URI from which the feed XML will be retrieved. =back I<$format> allows you to override format guessing. =head2 XML::Feed->get_file($filename) Gets a feed from a file. =head3 CML::Feed->get_fh($fh) Gets a feed from pre-opened filehandle. =head2 XML::Feed->get_uri($uri) Gets a feed from a URI. =head2 XML::Feed->find_feeds($uri) Given a URI I<$uri>, use auto-discovery to find all of the feeds linked from that page (using IlinkE> tags). Returns a list of feed URIs. =head2 XML::Feed->identify_format(\$xml) Given the xml of a feed return what format it is in, with C or C for all versions of RSS. Note that you pass in a scalar ref to the xml string. =head2 $feed->convert($format) Converts the I object into the I<$format> format, and returns the new object. =head2 $feed->splice($other_feed) Splices in all of the entries from the feed I<$other_feed> into I<$feed>, skipping posts that are already in I<$feed>. =head2 $feed->format Returns the format of the feed (C, or some version of C). =head2 $feed->title([ $title ]) The title of the feed/channel. =head2 $feed->base([ $base ]) The url base of the feed/channel. =head2 $feed->link([ $uri ]) The permalink of the feed/channel. =head2 $feed->tagline([ $tagline ]) The description or tagline of the feed/channel. =head2 $feed->description([ $description ]) Alias for I<$feed-Etagline>. =head2 $feed->author([ $author ]) The author of the feed/channel. =head2 $feed->language([ $language ]) The language of the feed. =head2 $feed->copyright([ $copyright ]) The copyright notice of the feed. =head2 $feed->modified([ $modified ]) A I object representing the last-modified date of the feed. If present, I<$modified> should be a I object. =head2 $feed->generator([ $generator ]) The generator of the feed. =head2 $feed->self_link ([ $uri ]) The Atom Self-link of the feed: L A string. =head2 $feed->entries A list of the entries/items in the feed. Returns an array containing L objects. =head2 $feed->items A synonym (alias) for C<$feed-Eentries>. =head2 $feed->add_entry($entry) Adds an entry to the feed. I<$entry> should be an L object in the correct format for the feed. =head2 $feed->as_xml Returns an XML representation of the feed, in the format determined by the current format of the I<$feed> object. =head2 $feed->first_link ([ $uri ]) The Atom First-link for feed paging and archiving (RFC 5005). L =head2 $feed->last_link ([ $uri ]) The Atom Last-link for feed paging and archiving. =head2 $feed->next_link ([ $uri ]) The Atom Next-link for feed paging and archiving. =head2 $feed->previous_link ([ $uri ]) The Atom Previous-link for feed paging and archiving. =head2 $feed->current_link ([ $uri ]) The Atom Current-link for feed paging and archiving. =head2 $feed->next_archive_link ([ $uri ]) The Atom Next-link for feed paging and archiving. =head2 $feed->prev_archive_link ([ $uri ]) The Atom Prev-Archive-link for feed paging and archiving. =head1 PACKAGE VARIABLES =over 4 =item C<$XML::Feed::Format::RSS::PREFERRED_PARSER> If you want to use another RSS parser class than XML::RSS (default), you can change the class by setting C<$PREFERRED_PARSER> variable in the XML::Feed::Format::RSS package. $XML::Feed::Format::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; B this will only work for parsing feeds, not creating feeds. B Only C version 0.3004 is known to work at the moment. =item C<$XML::Feed::MULTIPLE_ENCLOSURES> Although the RSS specification states that there can be at most one enclosure per item some feeds break this rule. If this variable is set then C captures all of them and makes them available as a list. Otherwise it returns the last enclosure parsed. B C version 1.44 is needed for this to work. =back =cut =head1 VALID FEEDS For reference, this cgi script will create valid, albeit nonsensical feeds (according to C anyway) for Atom 1.0 and RSS 0.90, 0.91, 1.0 and 2.0. #!perl -w use strict; use CGI; use CGI::Carp qw(fatalsToBrowser); use DateTime; use XML::Feed; my $cgi = CGI->new; my @args = ( $cgi->param('format') // "Atom" ); push @args, ( version => $cgi->param('version') ) if $cgi->param('version'); my $feed = XML::Feed->new(@args); $feed->id("http://".time.rand()."/"); $feed->title('Test Feed'); $feed->link($cgi->url); $feed->self_link($cgi->url( -query => 1, -full => 1, -rewrite => 1) ); $feed->modified(DateTime->now); my $entry = XML::Feed::Entry->new(); $entry->id("http://".time.rand()."/"); $entry->link("http://example.com"); $entry->title("Test entry"); $entry->summary("Test summary"); $entry->content("Foo"); $entry->modified(DateTime->now); $entry->author('test@example.com (Testy McTesterson)'); $feed->add_entry($entry); my $mime = ("Atom" eq $feed->format) ? "application/atom+xml" : "application/rss+xml"; print $cgi->header($mime); print $feed->as_xml; =head1 LICENSE I is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR & COPYRIGHT Except where otherwise noted, I is Copyright 2004-2008 Six Apart. All rights reserved. =head1 SUPPORT For support contact the XML::Feed mailing list - xml-feed@perlhacks.com. =head1 SOURCE CODE The latest version of I can be found at http://github.com/davorg/XML-Feed =cut