package Geo::OSM::MapFeatures;
use warnings;
use strict;
use utf8;
use Data::Dumper;
use Error;
use HTML::TableExtract qw(tree);
use LWP::UserAgent;
use URI::Escape qw(uri_escape);
use Storable;
use XML::Simple;
use base qw(Class::Accessor);
__PACKAGE__->mk_accessors(qw(api_url mapfeatures_pagename trace));
use Geo::OSM::MapFeatures::Feature;
=head1 NAME
Geo::OSM::MapFeatures - Parses and represents OpenStreetMap Map Features
=head1 VERSION
Version 0.10
=cut
our $VERSION = '0.10';
=head1 SYNOPSIS
use Geo::OSM::MapFeatures;
my $mf = new Geo::OSM::MapFeatures;
$mf->download();
$mf->parse();
# To print a simple ascii representation:
foreach my $category ( sort( $mf->categories() ) ){
print "\n\n===== $category =====\n";
foreach my $feature ( $mf->features($category) ){
print "$feature\n";
}
}
# Or you can choose not to use the string overloading and get the
# individual elements yourself:
foreach my $category ( sort( $mf->categories() ) ){
print "\n\n===== $category =====\n";
foreach my $feature ( $mf->features($category) ){
print "Key: ".$feature->key()."\n";
print "Value(s): ". join("\n ", @{$feature->values()})."\n";
print "Description: ".$feature->description()."\n\n";
}
}
=head1 FUNCTIONS
=head2 new (constructor)
Create a new instance of this class. Pass parameters as a hashref.
Parameters:
=over 8
=item page
What page to fetch. Defaults to "Map_Features".
Translated pages work if the table header names are recognized, the module
contains a mapping table with translated names in over a dozen languages.
=back
Returns: new instance of this class.
=cut
sub new {
my ( $pkg, $params ) = @_;
my $class = ref $pkg || $pkg;
my $self = bless( {}, $class);
if( $$params{page} ){
$self->mapfeatures_pagename($$params{page});
} else {
$self->mapfeatures_pagename("Map_Features");
}
$self->api_url("http://wiki.openstreetmap.org/api.php");
my %tableheader_translations = (
'En' => {
key => 'key',
value => 'value',
element => 'element',
comment => 'comment',
},
De => {
key => 'Schlüssel',
value => 'Wert',
element => 'Element',
comment => 'Kommentar',
},
ES => {
key => 'Clave',
value => 'Valor',
element => 'Elemento',
comment => 'Comentario',
},
FR => {
key => 'Clé',
value => 'Valeur',
element => 'Élément',
comment => 'Commentaire',
},
IT => {
key => 'Chiave',
value => 'Valore',
element => 'Elemento',
comment => 'Spiegazione',
},
Ja => {
key => 'キー',
value => '値',
element => '要素',
comment => '説明',
},
Hu => {
key => 'Kulcs',
value => 'Érték',
element => 'Alapelem',
comment => 'Magyarázat',
},
Pt => {
key => 'Chave',
value => 'Valor',
element => 'Element',
comment => 'Comentários',
},
Ro => {
key => 'Cheie',
value => 'Valoare',
element => 'Element',
comment => 'Descriere',
},
RU => {
key => 'Ключ',
value => 'Значение',
element => 'Элементы',
comment => 'Описание',
},
SK => {
key => 'Klúč',
value => 'Hodnota',
element => 'Element',
comment => 'Komentár',
},
Sv => {
key => 'Nyckelord',
value => 'Värde',
element => 'Element',
comment => 'Kommentar',
},
Tr => {
key => 'Anahtar',
value => 'Değer',
element => 'Öğe',
comment => 'Açıklama',
},
Lt => {
key => 'Kategorija',
value => 'Kodas',
element => 'Įvedimo būdai',
comment => 'Aprašymas',
},
Uk => {
key => 'Ключ',
value => 'Значення',
element => 'Елемент',
comment => 'Пояснення',
},
Traditional_Chinese => {
key => '類別',
value => '值',
element => '元素',
comment => '說明',
},
);
# Build and compile regexes with all translations
foreach my $string ( qw(key value element comment) ){
my @translations = ();
foreach my $language ( values(%tableheader_translations) ){
push(@translations, $$language{$string});
}
my $regex_string = join('|', @translations);
$self->{tableheader_translation_regexes}{$string} = qr/$regex_string/i;
}
return $self;
}
=head2 download
Downloads Map Features from wiki.openstreetmap.org.
Throws exceptions if something goes wrong.
Returns: undef
=cut
sub download {
my $self = shift;
# Setup HTTP useragent
my $ua = LWP::UserAgent->new;
$ua->agent("Geo_OSM_MapFeatures/$Geo::OSM::MapFeatures::VERSION");
# Fetch MW parser output of page
my $req = HTTP::Request->new(GET => sprintf("%s?action=parse&prop=text&format=xml&page=%s", $self->api_url, $self->mapfeatures_pagename));
warn "Fetching ".$req->uri."\n" if $self->trace();
my $res = $ua->request($req);
if( ! $res->is_success ){
throw Geo::OSM::MapFeatures::Error::Network(sprintf("Couldn't fetch %s: %s", $req->uri, $res->status_line));
}
$self->{content} = XMLin($res->content);
}
=pod
=head2 debug_download
Download and cache in "mapfeatures.debug" in the current directory, to avoid
downloading the page again and again when developing.
For example do something like the following:
unless( $ENV{MAPFEATURESDEBUG} ){
$mf->download();
} else {
$mf->debug_download();
}
=cut
sub debug_download {
my $self = shift;
if( -f 'mapfeatures.debug' ){
my $data = retrieve('mapfeatures.debug') or die;
$$self{content} = $$data{content};
} else {
$self->download();
my $data = {content => $$self{content}};
store($data, 'mapfeatures.debug') or die;
}
}
=head2 parse
Parses map features.
=cut
sub parse {
my $self = shift;
throw Geo::OSM::MapFeatures::Error("No content, is it downloaded?")
unless $self->{content};
throw Geo::OSM::MapFeatures::Error("Couldn't find <parse><text> element, something wrong with api.php?")
unless $self->{content}{parse}{text};
my %data;
my $tableextractor = HTML::TableExtract->new(
# Get header translation regexes with a hash slice
headers => [ @{$self->{tableheader_translation_regexes}}{qw(key value element comment)} ],
);
$tableextractor->parse($$self{content}{parse}{text});
if( $tableextractor->tables == 0 ){
throw Geo::OSM::MapFeatures::Error::Parse("Did not find any tables");
}
#DEBUG: $tableextractor->tree->dump;
# Examine all matching tables
foreach my $table ($tableextractor->tables) {
# Find headings before the table but at the same level.
# Loop through in reverse and find the first of each
# heading level upwards
my @headings = ();
my $lowestheading = 10;
foreach my $heading_elem ( reverse( grep { $_->tag() =~ /^h(?:\d)$/ } $table->tree->left ) ){
my( $headinglevel ) = $heading_elem->tag() =~ /^h(\d)$/;
# Only store the first for a particular level
next if defined($headings[$headinglevel]);
# Don't store a small heading if we already saw something
# larger. For example if we first saw h2 then h3 the h3
# belongs to the previous h2, not this one.
next if $#headings && $headinglevel > $lowestheading;
$lowestheading = $headinglevel;
$headings[$headinglevel] = $heading_elem->as_trimmed_text;
}
@headings = grep { defined } @headings;
my $have_added_in_table_heading;
foreach my $row ($table->rows) {
# If the first column is spanned it's probably a heading
# dividing the table in parts.
# Make sure to push exactly the last one of these onto
# the list of headings
if( $$row[0]->attr('colspan') ){
pop(@headings) if $have_added_in_table_heading;
push(@headings, $$row[0]->as_trimmed_text);
$have_added_in_table_heading++;
next;
}
my $key = $$row[0];
my $value = $$row[1];
my $element = $$row[2];
my $description = $$row[3];
$key = $key->as_trimmed_text;
# Elements are given by images with filenames Mf_(node|way|area).png.
# This regex intentionally matches more, to make sure the module can detect that wikifiddlers have "invented" another element type or something
my @elementtypes = map { $_->attr('src') =~ /Mf_(\w+)\./ } $element->find('img');
# Find values and split, either by <li> elements or by various text separators
my @values;
if( $value->find('li') ){
@values = map { $_->as_trimmed_text } $value->find('li');
} else {
# Split on "/" (except for 24/7), "or" and "|"
@values = split( m{\s*(?:(?<!24)/(?!7)|\bor\b|\|)\s*}, $value->as_trimmed_text );
}
$description = $description->as_trimmed_text;
#DEBUG: print "Row: k='$key' v='".join("','",@values)."' e='".join("','",@elementtypes)."' c='$description'\n";
my $feature = new Geo::OSM::MapFeatures::Feature($key, \@values, \@elementtypes, $description);
#FIXME: There should be a real hierarchy, not just a category made by concatenating headings
my $headingstring = join(' / ', @headings);
push(@{$self->{features}{$headingstring}}, $feature);
}
}
}
=head2 categories
Returns a list of feature categories.
=cut
sub categories {
my $self = shift;
return keys( %{ $self->{features} } );
}
=head2 features
Returns a list of features.
If given an argument it as taken as a category, and only features
in that category will be returned.
=cut
sub features {
my $self = shift;
my $category = shift;
if( defined($category) ){
return @{ $self->{features}{$category} };
} else {
my @result = ();
foreach my $category ( $self->categories() ){
push(@result, $self->features($category));
}
return @result;
}
}
=head1 Exception classes
=head2 Geo::OSM::MapFeatures::Error
Base exception class for errors thrown by this module
=cut
package Geo::OSM::MapFeatures::Error;
use base qw(Error);
sub new {
my $self = shift;
my $text = "" . shift;
my $params = shift;
local $Error::Depth = $Error::Depth + 1;
$self->SUPER::new(-text => $text, %$params);
}
sub stringify {
my $self = shift;
my $text = $self->SUPER::stringify;
$text .= sprintf(" at %s line %d.\n", $self->file, $self->line)
unless($text =~ /\n$/s);
$text;
}
=head2 Geo::OSM::MapFeatures::Error::Network
Network error
=cut
package Geo::OSM::MapFeatures::Error::Network;
our @ISA = qw(Geo::OSM::MapFeatures::Error);
=head2 Geo::OSM::MapFeatures::Error::Parse
Go find out who broke map feature this time...
=cut
package Geo::OSM::MapFeatures::Error::Parse;
our @ISA = qw(Geo::OSM::MapFeatures::Error);
=head1 AUTHOR
Knut Arne Bjørndal, C<< <bob at cakebox.net> >>
=head1 BUGS
Categories are currently made by concatenating headings above a feature. This should probably be a proper hierarchy instead.
The table header translation table should probably be easier to patch from programs calling the module. Or maybe even downloaded from the wiki or something.
Please report any bugs or feature requests to C<bug-geo-osm-mapfeatures at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Geo-OSM-MapFeatures>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc Geo::OSM::MapFeatures
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Geo-OSM-MapFeatures>
=item * AnnoCPAN: Annotated CPAN documentation
L<http://annocpan.org/dist/Geo-OSM-MapFeatures>
=item * CPAN Ratings
L<http://cpanratings.perl.org/d/Geo-OSM-MapFeatures>
=item * Search CPAN
L<http://search.cpan.org/dist/Geo-OSM-MapFeatures>
=back
=head1 ACKNOWLEDGEMENTS
=head1 COPYRIGHT & LICENSE
Copyright 2008-2009 Knut Arne Bjørndal, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1; # End of Geo::OSM::MapFeatures