use 5.006;
use strict;
use warnings FATAL => 'all';
use Fcntl();
use English qw( -no_match_vars );
use Carp();
use IO::File();
sub _MAGIC_NUMBER_BUFFER_SIZE { return 2 } # for .zip and .gz files
sub _GRANDPARENT_INDEX { return -2 }
sub _PARENT_INDEX { return -1 }
sub _EXCEL_COLUMN_RADIX { return 26 }
sub _BUFFER_SIZE { return 4096 }
our $VERSION = '0.13';
sub new {
my ( $class, $params ) = @_;
my $self = {};
if ( defined $params->{worksheet_name} ) {
$self->{worksheet_name} = $params->{worksheet_name};
}
elsif ( defined $params->{worksheet_number} ) {
if ( $params->{worksheet_number} =~ /^\d+$/smx ) {
$self->{worksheet_number} = $params->{worksheet_number};
}
else {
Carp::croak('worksheet_number parameter is not a number');
}
}
else {
$self->{worksheet_number} = 1;
}
delete $params->{worksheet_number};
delete $params->{worksheet_name};
if ( !exists $params->{binary} ) {
$params->{binary} = 1;
}
$self->{csv} = Text::CSV_XS->new($params);
bless $self, $class;
return $self;
}
sub getline {
my ( $self, $handle ) = @_;
if ( ( defined $self->{handle} ) && ( $self->{handle} eq $handle ) ) {
}
else {
$self->{eof} = q[];
if ( $self->_setup_handle($handle) ) {
}
else {
return;
}
}
my $row = $self->{cells}->[ $self->{row_index}++ ];
if ( !defined $row ) {
$self->{eof} = 1;
}
return $row;
}
sub eof {
my ($self) = @_;
return $self->{eof};
}
sub error_diag {
my ($self) = @_;
return $self->{_ERROR_DIAG};
}
sub _stuff_input_into_tmp_file {
my ( $self, $input_handle ) = @_;
seek $input_handle, 0, Fcntl::SEEK_SET()
or
Carp::croak("Failed to seek to start of filehandle:$EXTENDED_OS_ERROR");
my $handle = IO::File->new_tmpfile()
or Carp::croak("Failed to create temporary file:$EXTENDED_OS_ERROR");
my $result;
while ( $result = read $input_handle, my $buffer, _BUFFER_SIZE() ) {
print {$handle} $buffer
or
Carp::croak("Failed to write to temporary file:$EXTENDED_OS_ERROR");
}
defined $result
or Carp::croak "Failed to read from input file:$EXTENDED_OS_ERROR";
seek $handle, 0, Fcntl::SEEK_SET()
or
Carp::croak("Failed to seek to start of filehandle:$EXTENDED_OS_ERROR");
seek $input_handle, 0, Fcntl::SEEK_SET()
or
Carp::croak("Failed to seek to start of filehandle:$EXTENDED_OS_ERROR");
return $handle;
}
sub _xls_parser {
my ($self) = @_;
my $parser = Spreadsheet::ParseExcel->new(
CellHandler => sub {
my ( $workbook, $sheet_index, $row, $col, $cell ) = @_;
my $worksheet = $workbook->worksheet($sheet_index);
my $process_worksheet = 0;
if ( ( defined $self->{worksheet_name} )
&& ( $self->{worksheet_name} eq $worksheet->get_name() ) )
{
$self->{xls_worksheet_found} = 1;
$process_worksheet = 1;
}
elsif (( $self->{worksheet_number} )
&& ( ( $self->{worksheet_number} - 1 ) == $sheet_index ) )
{
$self->{xls_worksheet_found} = 1;
$process_worksheet = 1;
}
if ($process_worksheet) {
$self->{cells}->[$row]->[$col] = $cell->{_Value};
}
},
NotSetCell => 1,
);
return $parser;
}
sub _setup_handle {
my ( $self, $input_handle ) = @_;
my $magic_bytes = $self->_sniff_magic_bytes($input_handle);
my $handle = $self->_stuff_input_into_tmp_file($input_handle);
my $parser = $self->_xls_parser();
if ( $magic_bytes =~ /^PK/smx ) {
if ( defined $self->_setup_zip($handle) ) {
$self->{handle} = $input_handle;
}
else {
return;
}
}
elsif ( $magic_bytes =~ /^\037\213/smx ) {
if ( $self->_setup_compress_zlib_spreadsheet($handle) ) {
$self->{handle} = $input_handle;
}
else {
return;
}
}
elsif ( ( defined $parser ) && ( my $workbook = $parser->parse($handle) ) )
{
if ( $self->_setup_xls_spreadsheet($workbook) ) {
$self->{handle} = $input_handle;
}
elsif ( !$self->{xls_worksheet_found} ) {
$self->{_ERROR_DIAG} = 'ENOENT - Worksheet '
. (
defined $self->{worksheet_name}
? $self->{worksheet_name}
: $self->{worksheet_number}
) . ' not found';
return;
}
else {
return;
}
}
else {
$handle = $self->_stuff_input_into_tmp_file($input_handle);
$self->{cells} = [];
while ( my $row = $self->{csv}->getline($handle) ) {
push @{ $self->{cells} }, $row;
}
if ( $self->{csv}->eof() ) {
$self->{content_type} = 'text/csv'; # according to RFC 4180
$self->{type} = 'csv';
$self->{row_index} = 0;
$self->{handle} = $input_handle;
}
else {
$self->{_ERROR_DIAG} = $self->{csv}->error_diag();
return;
}
}
return $self;
}
sub _setup_zip {
my ( $self, $handle ) = @_;
my $zip = Archive::Zip->new();
Archive::Zip::setErrorHandler( sub { chomp $_[0]; die "$_[0]\n"; } );
my $result;
my $zip_error = 'Corrupt ZIP file';
eval { $result = $zip->readFromFileHandle($handle); }
or do { chomp $EVAL_ERROR; $zip_error = $EVAL_ERROR };
if ( ( defined $result ) && ( $result == Archive::Zip::AZ_OK() ) ) {
}
else {
$self->{_ERROR_DIAG} = "ZIP - $zip_error";
return;
}
if ( $self->_parse_archive_zip_spreadsheet($zip) ) {
return 1;
}
else {
return;
}
}
sub _sniff_magic_bytes {
my ( $self, $handle ) = @_;
seek $handle, 0, Fcntl::SEEK_SET()
or
Carp::croak("Failed to seek to start of filehandle:$EXTENDED_OS_ERROR");
defined read $handle, my $magic_bytes, _MAGIC_NUMBER_BUFFER_SIZE()
or Carp::croak("Failed to read from filehandle:$EXTENDED_OS_ERROR");
seek $handle, 0, Fcntl::SEEK_SET()
or
Carp::croak("Failed to seek to start of filehandle:$EXTENDED_OS_ERROR");
return $magic_bytes;
}
sub _handle_workbook_type_zips {
my ( $self, $zip ) = @_;
my $shared_strings = $self->_xlsx_shared_strings($zip);
if ( !defined $shared_strings ) {
return;
}
my $worksheet_path = $self->_xlsx_worksheet_path($zip);
if ( ( defined $worksheet_path )
&& ( my $worksheet = $zip->memberNamed($worksheet_path) ) )
{
$self->{'content_type'} =
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
$self->{type} = 'xlsx';
$self->{zip} = $zip;
$self->{row_index} = 0;
my $content = $worksheet->contents();
my $cells =
$self->_xlsx_cells( $content, $shared_strings, $worksheet_path );
if ( defined $cells ) {
$self->{cells} = $cells;
return 1;
}
else {
return;
}
}
elsif ( defined $worksheet_path ) {
$self->{_ERROR_DIAG} =
q[ZIP - Missing '] . $worksheet_path . q[' file in .xlsx file];
return;
}
else {
return;
}
}
sub _handle_mimetype_type_zips {
my ( $self, $zip ) = @_;
my $member = $zip->memberNamed('mimetype');
my $content_type;
delete $self->{type};
if ( defined $member ) {
$content_type = $member->contents();
if ( $content_type eq 'application/vnd.oasis.opendocument.spreadsheet' )
{
$self->{type} = 'ods';
}
elsif ( $content_type eq 'application/vnd.sun.xml.calc' ) {
$self->{type} = 'sxc';
}
elsif ( $content_type eq 'application/x-kspread' ) {
$self->{type} = 'ksp';
}
}
if ( ( $self->{type} ) && ( $self->{type} eq 'ksp' ) ) {
$self->{content_type} = $content_type;
my $maindoc_member = $zip->memberNamed('maindoc.xml');
if ( defined $maindoc_member ) {
$self->{zip} = $zip;
$self->{row_index} = 0;
my $maindoc_data = $maindoc_member->contents();
my $cells = $self->_ksp_cells($maindoc_data);
if ( defined $cells ) {
$self->{cells} = $cells;
return 1;
}
else {
return;
}
}
else {
$self->{_ERROR_DIAG} =
q[ZIP - Missing 'content.xml' file in .]
. ( lc $self->{type} )
. q[ file];
return;
}
}
elsif ( $self->{type} ) {
$self->{content_type} = $content_type;
my $content_member = $zip->memberNamed('content.xml');
if ( defined $content_member ) {
$self->{zip} = $zip;
$self->{row_index} = 0;
my $content_data = $content_member->contents();
my $cells = $self->_ods_cells($content_data);
if ( defined $cells ) {
$self->{cells} = $cells;
return 1;
}
else {
return;
}
}
else {
$self->{_ERROR_DIAG} =
q[ZIP - Missing 'content.xml' file in .]
. ( lc $self->{type} )
. q[ file];
return;
}
}
else {
$self->{_ERROR_DIAG} =
q[ZIP - mimetype file does not contain any known MIME Types in OpenOffice document];
return;
}
}
sub _parse_archive_zip_spreadsheet {
my ( $self, $zip ) = @_;
if ( $zip->memberNamed('xl/workbook.xml') ) {
if ( !defined $self->_handle_workbook_type_zips($zip) ) {
return;
}
}
elsif ( $zip->memberNamed('mimetype') ) {
if ( !defined $self->_handle_mimetype_type_zips($zip) ) {
return;
}
}
else {
$self->{_ERROR_DIAG} =
q[ZIP - Missing any identifiable spreadsheet file in ZIP archive];
return;
}
return 1;
}
sub _setup_compress_zlib_spreadsheet {
my ( $self, $handle ) = @_;
seek $handle, 0, Fcntl::SEEK_SET()
or
Carp::croak("Failed to seek to start of filehandle:$EXTENDED_OS_ERROR");
my $contents;
my $gzip_error = 'Corrupt GZIP';
my $result = 0;
eval {
my $gz = Compress::Zlib::gzopen( $handle, 'rb' )
or die "Cannot open handle: $Compress::Zlib::gzerrno\n";
while ( $gz->gzread( my $uncompressed ) > 0 ) {
$contents .= $uncompressed;
}
if ( $gz->gzerror() != Compress::Zlib::Z_STREAM_END() ) {
die "Failed to read from handle:$Compress::Zlib::gzerrno\n";
}
$gz->gzclose()
and die "Failed to close compression:$Compress::Zlib::gzerrno\n";
$result = 1;
} or do {
chomp $EVAL_ERROR;
if ($EVAL_ERROR) {
$gzip_error = $EVAL_ERROR;
}
};
if ( $result == 0 ) {
$self->{_ERROR_DIAG} = q[GZIP - ] . $gzip_error;
return;
}
$self->{content_type} = 'application/x-gnumeric';
$self->{type} = 'gnumeric';
$self->{row_index} = 0;
my $cells = $self->_gnumeric_cells($contents);
if ( defined $cells ) {
$self->{cells} = $cells;
}
else {
return;
}
return 1;
}
sub _setup_xls_spreadsheet {
my ( $self, $workbook ) = @_;
my $worksheet;
if ( defined $self->{worksheet_name} ) {
$worksheet = $workbook->worksheet( $self->{worksheet_name} );
}
elsif ( $self->{worksheet_number} ) {
$worksheet = $workbook->worksheet( $self->{worksheet_number} - 1 );
}
else {
$worksheet = $workbook->worksheet(0);
}
if ( !defined $worksheet ) {
return;
}
$self->{content_type} = 'application/vnd.ms-excel';
$self->{type} = 'xls';
$self->{row_index} = 0;
return 1;
}
sub _process_worksheet {
my ( $self, $current_worksheet_name, $current_worksheet_index ) = @_;
my $process_worksheet = 0;
if ( ( defined $self->{worksheet_name} )
&& ( $self->{worksheet_name} eq $current_worksheet_name ) )
{
$process_worksheet = 1;
}
elsif (( $self->{worksheet_number} )
&& ( ( $self->{worksheet_number} - 1 ) == $current_worksheet_index ) )
{
$process_worksheet = 1;
}
return $process_worksheet;
}
sub _ksp_cells {
my ( $self, $maindoc_content ) = @_;
my $cells = [];
my $current_worksheet_name;
my $parameter_stack;
my $element_stack;
my $worksheet_count;
my $process_worksheet = 0;
my $xml = XML::Parser->new(
Handlers => {
Entity => sub {
die
"XML Entities have been detected and rejected in the XML, due to security concerns\n";
},
Start => sub {
my ( $expat, $element_name, %element_parameters ) = @_;
push @{$element_stack}, $element_name;
push @{$parameter_stack}, \%element_parameters;
if ( $element_name eq 'table' ) {
if ( defined $worksheet_count ) {
$worksheet_count += 1;
}
else {
$worksheet_count = 0;
}
$process_worksheet =
$self->_process_worksheet( $element_parameters{name},
$worksheet_count );
}
},
End => sub {
my ( $expat, $element_name ) = @_;
pop @{$parameter_stack};
if ( $element_name ne pop @{$element_stack} ) {
Carp::croak(
'Internal confusion in processing XML elements');
}
},
Char => sub {
my ( $expat, $content ) = @_;
if ($process_worksheet) {
if ( ( defined $element_stack->[ _GRANDPARENT_INDEX() ] )
&& (
$element_stack->[ _GRANDPARENT_INDEX() ] eq 'cell' )
&& ( $element_stack->[ _PARENT_INDEX() ] eq 'text' ) )
{
$cells->[ $parameter_stack->[ _GRANDPARENT_INDEX() ]
->{row} - 1 ]
->[ $parameter_stack->[ _GRANDPARENT_INDEX() ]
->{column} - 1 ] = $content;
}
}
}
}
);
eval { $xml->parse($maindoc_content); } or do {
chomp $EVAL_ERROR;
$EVAL_ERROR =~ s/^\s*//smx;
$self->{_ERROR_DIAG} = "XML - Invalid XML in maindoc.xml:$EVAL_ERROR";
return;
};
return $cells;
}
sub _ods_cells {
my ( $self, $ods_content ) = @_;
my $cells;
my $current_row;
my $current_column_number;
my $element_stack;
my $process_worksheet = 0;
my $worksheet_count;
my $xml = XML::Parser->new(
Handlers => {
Entity => sub {
die
"XML Entities have been detected and rejected in the XML, due to security concerns\n";
},
Start => sub {
my ( $expat, $element_name, %element_parameters ) = @_;
push @{$element_stack}, $element_name;
if ( $element_name eq 'table:table-row' ) {
$current_row = [];
$current_column_number = 0 - 1;
}
elsif ( $element_name eq 'table:table' ) {
if ( defined $worksheet_count ) {
$worksheet_count += 1;
}
else {
$worksheet_count = 0;
}
$process_worksheet =
$self->_process_worksheet(
$element_parameters{'table:name'},
$worksheet_count );
}
elsif ( $element_name eq 'table:table-cell' ) {
$current_column_number += 1;
}
},
End => sub {
my ( $expat, $element_name ) = @_;
if ( $element_name ne pop @{$element_stack} ) {
Carp::croak(
'Internal confusion in processing XML elements');
}
if ( $element_name eq 'table:table-row' ) {
if ( @{$current_row} ) {
push @{$cells}, $current_row;
}
}
elsif ( $element_name eq 'table:table' ) {
$process_worksheet = 0;
}
},
Char => sub {
my ( $expat, $content ) = @_;
if ($process_worksheet) {
if ( $element_stack->[_PARENT_INDEX] eq 'text:p' ) {
if ( $element_stack->[ _GRANDPARENT_INDEX() ] eq
'table:table-cell' )
{
if (
defined $current_row->[$current_column_number] )
{
$current_row->[$current_column_number] .=
$content;
}
else {
$current_row->[$current_column_number] =
$content;
}
}
}
}
}
}
);
eval { $xml->parse($ods_content); } or do {
chomp $EVAL_ERROR;
$EVAL_ERROR =~ s/^\s*//smx;
$self->{_ERROR_DIAG} = "XML - Invalid XML in content.xml:$EVAL_ERROR";
return;
};
if ( defined $cells ) {
return $cells;
}
else {
$self->{_ERROR_DIAG} = 'ENOENT - Worksheet '
. (
defined $self->{worksheet_name}
? $self->{worksheet_name}
: $self->{worksheet_number}
) . ' not found';
return;
}
}
sub _gnumeric_cells {
my ( $self, $gnumeric_content ) = @_;
my $cells;
my $current_row_number;
my $current_column_number;
my $current_worksheet_name;
my $current_sheet_cells;
my $element_stack;
my $process_worksheet = 0;
my $worksheet_count;
my $xml = XML::Parser->new(
Handlers => {
Entity => sub {
die
"XML Entities have been detected and rejected in the XML, due to security concerns\n";
},
Start => sub {
my ( $expat, $element_name, %element_parameters ) = @_;
push @{$element_stack}, $element_name;
if ( $element_name eq 'gnm:Cell' ) {
$current_row_number = $element_parameters{Row};
$current_column_number = $element_parameters{Col};
}
elsif ( $element_name eq 'gnm:Sheet' ) {
$current_sheet_cells = [];
}
},
End => sub {
my ( $expat, $element_name ) = @_;
if ( $element_name ne pop @{$element_stack} ) {
Carp::croak(
'Internal confusion in processing XML elements');
}
elsif ( $element_name eq 'gnm:Sheet' ) {
if ( defined $worksheet_count ) {
$worksheet_count += 1;
}
else {
$worksheet_count = 0;
}
if (
$self->_process_worksheet(
$current_worksheet_name, $worksheet_count
)
)
{
$cells = $current_sheet_cells;
}
}
},
Char => sub {
my ( $expat, $content ) = @_;
if ( $element_stack->[ _PARENT_INDEX() ] eq 'gnm:Name' ) {
if ( $element_stack->[ _GRANDPARENT_INDEX() ] eq
'gnm:Sheet' )
{
$current_worksheet_name = $content;
}
}
if ( $element_stack->[ _PARENT_INDEX() ] eq 'gnm:Cell' ) {
$current_sheet_cells->[$current_row_number]
->[$current_column_number] = $content;
}
}
}
);
eval { $xml->parse($gnumeric_content); } or do {
chomp $EVAL_ERROR;
$EVAL_ERROR =~ s/^\s*//smx;
$self->{_ERROR_DIAG} =
"XML - Invalid XML in gzipped gnumeric file:$EVAL_ERROR";
return;
};
if ( defined $cells ) {
return $cells;
}
else {
$self->{_ERROR_DIAG} = 'ENOENT - Worksheet '
. (
defined $self->{worksheet_name}
? $self->{worksheet_name}
: $self->{worksheet_number}
) . ' not found';
return;
}
}
sub _xlsx_shared_strings {
my ( $self, $zip ) = @_;
my $member = $zip->memberNamed('xl/sharedStrings.xml');
if ( !$member ) {
$self->{_ERROR_DIAG} = q[ZIP - Missing 'xl/sharedStrings.xml' file];
return;
}
my $shared_string_content = $member->contents();
my $element_stack;
my $current_index;
my $shared_strings = {};
my $xml = XML::Parser->new(
Handlers => {
Entity => sub {
die
"XML Entities have been detected and rejected in the XML, due to security concerns\n";
},
Start => sub {
my ( $expat, $element_name, %element_parameters ) = @_;
push @{$element_stack}, $element_name;
if ( $element_name eq 'sst' ) {
}
elsif ( $element_name eq 'si' ) {
if ( defined $current_index ) {
$current_index += 1;
}
else {
$current_index = 0;
}
}
elsif ( $element_name eq 't' ) {
}
},
End => sub {
my ( $expat, $element_name ) = @_;
if ( $element_name ne pop @{$element_stack} ) {
Carp::croak(
'Internal confusion in processing XML elements');
}
},
Char => sub {
my ( $expat, $content ) = @_;
if ( defined $shared_strings->{$current_index} ) {
$shared_strings->{$current_index} .= $content;
}
else {
$shared_strings->{$current_index} = $content;
}
}
}
);
eval { $xml->parse($shared_string_content); } or do {
chomp $EVAL_ERROR;
$EVAL_ERROR =~ s/^\s*//smx;
$self->{_ERROR_DIAG} =
"XML - Invalid XML in sharedStrings.xml:$EVAL_ERROR";
return;
};
return $shared_strings;
}
sub _xlsx_worksheet_path {
my ( $self, $zip ) = @_;
my $member = $zip->memberNamed('xl/workbook.xml');
if ( !$member ) {
$self->{_ERROR_DIAG} = q[ZIP - Missing 'xl/workbook.xml' file];
return;
}
my $content = $member->contents();
my $worksheet_number;
my $sheets = [];
my $worksheet_count;
my $xml = XML::Parser->new(
Handlers => {
Entity => sub {
die
"XML Entities have been detected and rejected in the XML, due to security concerns\n";
},
Start => sub {
my ( $expat, $element_name, %element_parameters ) = @_;
if ( $element_name eq 'sheet' ) {
push @{$sheets}, \%element_parameters;
if ( !defined $worksheet_number ) {
if ( defined $worksheet_count ) {
$worksheet_count += 1;
}
else {
$worksheet_count = 0;
}
if (
( defined $self->{worksheet_name} )
&& ( defined $element_parameters{name} )
&& ( $self->{worksheet_name} eq
$element_parameters{name} )
)
{
$worksheet_number = $worksheet_count + 1;
}
elsif (
( $self->{worksheet_number} )
&& ( ( $self->{worksheet_number} - 1 ) ==
$worksheet_count )
)
{
$worksheet_number = $worksheet_count + 1;
}
}
}
}
}
);
eval { $xml->parse($content); } or do {
chomp $EVAL_ERROR;
$EVAL_ERROR =~ s/^\s*//smx;
$self->{_ERROR_DIAG} =
"XML - Invalid XML in xl/workbook.xml:$EVAL_ERROR";
return;
};
if ( !defined $worksheet_number ) {
$self->{_ERROR_DIAG} = 'ENOENT - Worksheet '
. (
defined $self->{worksheet_name}
? $self->{worksheet_name}
: $self->{worksheet_number}
) . ' not found';
return;
}
return 'xl/worksheets/sheet' . $worksheet_number . '.xml';
}
sub _xlsx_cells {
my ( $self, $xlsx_content, $shared_strings, $worksheet_path ) = @_;
my $cells = [];
my $current_worksheet_name;
my $current_sheet_cells = [];
my $parameter_stack;
my $element_stack;
my $process_worksheet = 0;
my $xml = XML::Parser->new(
Handlers => {
Entity => sub {
die
"XML Entities have been detected and rejected in the XML, due to security concerns\n";
},
Start => sub {
my ( $expat, $element_name, %element_parameters ) = @_;
push @{$element_stack}, $element_name;
push @{$parameter_stack}, \%element_parameters;
},
End => sub {
my ( $expat, $element_name ) = @_;
pop @{$parameter_stack};
if ( $element_name ne pop @{$element_stack} ) {
Carp::croak(
'Internal confusion in processing XML elements');
}
},
Char => sub {
my ( $expat, $content ) = @_;
if ( ( $element_stack->[ _GRANDPARENT_INDEX() ] eq 'c' )
&& ( $element_stack->[ _PARENT_INDEX() ] eq 'v' ) )
{
my $cell_reference =
$parameter_stack->[ _GRANDPARENT_INDEX() ]->{r};
if ( $cell_reference =~ /^([[:upper:]]+)(\d+)$/smx ) {
my ( $column_designator, $row_number ) = ( $1, $2 - 1 );
my $column_number = 0;
my $letter_index = 0;
foreach
my $letter ( reverse split //smx, $column_designator )
{
$column_number +=
( ( ord uc $letter ) - ( ord 'A' ) ) *
( _EXCEL_COLUMN_RADIX()**$letter_index );
$letter_index += 1;
}
if (
(
defined
$parameter_stack->[ _GRANDPARENT_INDEX() ]->{t}
)
&& ( $parameter_stack->[ _GRANDPARENT_INDEX() ]->{t}
eq 's' )
)
{
$cells->[$row_number]->[$column_number] =
$shared_strings->{$content};
}
else {
$cells->[$row_number]->[$column_number] = $content;
}
}
else {
Carp::croak('Unable to determine cell reference');
}
}
}
}
);
eval { $xml->parse($xlsx_content); } or do {
chomp $EVAL_ERROR;
$self->{_ERROR_DIAG} =
"XML - Invalid XML in $worksheet_path:$EVAL_ERROR";
return;
};
return $cells;
}
sub content_type {
my ($self) = @_;
return $self->{content_type};
}
sub suffix {
my ($self) = @_;
return $self->{type};
}
1;
__END__
=head1 NAME
Spreadsheet::CSV - Drop-in replacement for Text::CSV_XS with spreadsheet support
=head1 VERSION
Version 0.13
=head1 SYNOPSIS
use Spreadsheet::CSV();
use CGI();
my $cgi = CGI->new();
my $handle = $cgi->upload('UploadFile');
my @rows;
my $csv = Spreadsheet::CSV->new();
while (my $row = $csv->getline ($handle)) {
$row->[2] =~ m/pattern/ or next; # 3rd field should match
push @rows, $row;
}
$csv->eof() or die $csv->error_diag();
close $handle or die "Screaming:$!";
=head1 DESCRIPTION
Spreadsheet::CSV attempts to provide a drop-in replacement for Text::CSV_XS when reading in user-provided input files, such as via Email or Web. This is currently only for reading documents and only via the $csv->getline interface documented above
=head1 SUBROUTINES/METHODS
=head2 new
(Class method) Returns a new instance of Spreadsheet::CSV. It accepts all the parameters from Text::CSV_XS, as well as two additional ones.
my $csv = Text::CSV_XS->new ({ attributes ... });
The Text::CSV_XS 'binary' parameter is switched on by default.
The following additional attributes are available:
=over 4
=item worksheet_name
X<worksheet_name>
The worksheet to read from for file handles containing spreadsheets.
=item worksheet_number
X<worksheet_number>
The worksheet to read from for file handles containing spreadsheets (starting from
1). This option will only apply if the worksheet name has not been specified. By
default worksheet_number will be '1'
=back
=head2 getline
X<getline>
$colref = $csv->getline ($io);
It reads a row from the IO object and parses this row into an array ref. This
array ref is returned by the function or undef for failure. If the IO object
points to a known spreadsheet file type, the first call to L</getline> will
convert the entire file into an in-memory list-of-lists before returning the first
row.
Accepted file types are currently
* Microsoft Excel 97 - .xls
* Microsoft Excel 2003 - .xlsx
* OpenOffice - .ods and .sxc
* Gnumeric - .gnumeric
* Kspread - .ksp
* CSV - .csv
Patches for support for other file types would be gratefully accepted.
=head2 eof
X<eof>
$eof = $csv->eof ();
This method will return true (1) if the last call hit end of file, otherwise
it will return false (''). This is useful to see the difference between a
failure and end of file
=head2 error_diag
$error_str = $csv->error_diag ();
If (and only if) an error occurred, this function returns the diagnostics of that error.
=head2 content_type
$content_type = $csv->content_type ();
After the $handle has been passed to getline, the content_type can be queried. If
$handle points to a recognised spreadsheet type, the appropriate content type will be
returned (such as 'application/vnd.ms-excel', otherwise, undef will be returned
=head2 suffix
$suffix = $csv->suffix ();
After the $handle has been passed to getline, the file suffix can be queried. If
$handle points to a recognised spreadsheet type, the suffix will be returned (such as
'xls'), otherwise, undef will be returned
=head1 DIAGNOSTICS
If an error occurred, L</error_diag> can be used to get more
information on the cause of the failure.
=head1 CONFIGURATION AND ENVIRONMENT
Spreadsheet::CSV requires no configuration files or environment variables.
=head1 DEPENDENCIES
Spreadsheet::CSV requires the following non-core modules
Spreadsheet::ParseExcel
Text::CSV_XS
Archive::Zip
XML::Parser
Compress::Zlib
and the following core modules
Fcntl
English
Carp
IO::File
=head1 AUTHOR
David Dick, C<< <ddick at cpan.org> >>
=head1 INCOMPATIBILITIES
The only spreadsheets supported at the moment are
Microsoft Excel 97
Microsoft Excel 2003
OpenOffice
Gnumeric
KSpread
CSV
=head1 BUGS AND LIMITATIONS
At the moment this library will read everything into RAM. It relies on the system enforcing the reasonable limits
for file size, to allow these files to be read into RAM. This may change in the future.
Spreadsheet::ParseExcel can lose precision when extracing floating point numbers
Please report any bugs or feature requests to C<bug-spreadsheet-csv at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Spreadsheet-CSV>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can look for information at:
=over 4
=item * CPAN Ratings
=item * Search CPAN
=item * Request Tracker
=back
=head1 LICENSE AND COPYRIGHT
Copyright 2013 David Dick.
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.