use 5.008005;
use strict;
use utf8;
new => 0,
ro => [qw/agent source_url workdir filename/],
);
use Carp;
use Guard;
use File::Temp 'tempdir';
use Furl;
use URI;
our $VERSION = "20131201";
sub new {
my ($class, %params) = @_;
$params{agent} ||= Furl->new(agent => sprintf('%s/%s', $class, $VERSION));
$params{workdir} ||= tempdir(CLEANUP => 1);
$params{filename} = fileparse(URI->new($params{source_url})->path);
bless { %params }, $class;
}
sub docfile {
my $self = shift;
File::Spec->catfile($self->workdir, $self->filename);
}
sub fetch {
my $self = shift;
my $res = $self->agent->get($self->source_url);
croak($res->status_line) unless $res->is_success;
$res;
}
sub _save_docfile {
my ($self, $content) = @_;
my $docfile = $self->docfile;
my ($fh, $guard) = $self->_openfile('>', $docfile);
print $fh $content;
$docfile;
}
sub _openfile {
my ($self, $mode, $path) = @_;
local $Carp::CarpLevel = 1;
open my $fh, $mode, $path or croak(sprintf('%s - %s', $path, $!));
my $guard = guard { close $fh };
($fh, $guard);
}
sub to_tsv {
my ($self, $separator) = @_;
my $res = $self->fetch;
my $file = $self->_save_docfile($res->content);
my ($fh, $guard) = $self->_openfile('<', $file);
my $doc = Text::Extract::Word->new($fh);
my $text = $doc->get_body;
$text =~ s/(C?D?E)(?:\s+?)(\d)/$1\n$2/g; ### add newline to tail of each records
$text =~ s/市内局番\t\t/市内局番\n/; ### separate header and first record
$text =~ s/\n+/\n/g; ### remove white line
### suppress strange newline
$text =~ s/\n([0-9]+)\n/\n$1/g;
$text =~ s/)\n(/)(/g;
if ($separator) {
$text =~ s/\t/$separator/g unless $separator eq "\t";
}
$text;
}
1;
__END__
=encoding utf-8
=head1 NAME
Number::Phone::JP::AreaCode::MasterData::Word2TSV - A helper class to extract a master data of area code from a MS-Word file that is distributed by www.soumu.go.jp
=head1 SYNOPSIS
use Number::Phone::JP::AreaCode::MasterData::Word2TSV;
my $obj = Number::Phone::JP::AreaCode::MasterData::Word2TSV->new( %options );
my $tsv_str = $obj->to_tsv( $separator );
=head1 DESCRIPTION
Number::Phone::JP::AreaCode::MasterData::Word2TSV helps to get a MS-Word file from http://www.soumu.go.jp/main_sosiki/joho_tsusin/top/tel_number/shigai_list.html. And, it can export as TSV.
=head1 METHODS
=head2 new
A Constructor Method.
You may pass following options by hash.
=over 4
=item agent
HTTP Client.
Default is an instance of L<Furl>.
=item source_url
URL for MS-Word file.
=item workdir
Working directory for using temporary. (ex: save MS-Word file)
Default is tempdir of L<File::Temp> with CLEANUP => 1.
=back
=head2 to_tsv
It returns TSV formatted master data of area code.
You may specify separator as first argument. Default is hard-tab (\t).
=head1 LICENSE
Copyright (C) ytnobody.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 AUTHOR
ytnobody E<lt>ytnobody@gmail.comE<gt>
=cut