use
5.12.1;
our
$VERSION
=
'1.15'
;
sub
_strings {
my
$self
=
shift
;
my
$reader
=
$self
->_xml_reader_for_zip_member(
'xl/sharedStrings.xml'
);
my
@strings
;
my
$last_string
;
NODE:
while
(
$reader
->
read
) {
next
NODE
if
$reader
->nodeType == XML_READER_TYPE_END_ELEMENT;
my
$node_name
=
$reader
->name;
if
(
$node_name
eq
'si'
) {
push
@strings
,
$last_string
if
defined
$last_string
;
$last_string
=
''
;
}
elsif
(
$node_name
eq
'#text'
) {
$last_string
.=
$reader
->value;
}
}
push
@strings
,
$last_string
if
defined
$last_string
;
return
\
@strings
;
}
sub
_workbook_data {
my
$self
=
shift
;
my
%workbook_data
= (
sheets
=> {},
base_year
=> 1900);
my
$sheet_id
= 1;
my
$reader
=
$self
->_xml_reader_for_zip_member(
'xl/workbook.xml'
);
NODE:
while
(
$reader
->
read
) {
next
NODE
if
$reader
->nodeType == XML_READER_TYPE_END_ELEMENT;
if
(
$reader
->name eq
'sheet'
) {
my
$name
=
$reader
->getAttribute(
'name'
)
or
die
"sheet node without name"
;
$workbook_data
{sheets}{
$name
} =
$sheet_id
++;
}
elsif
(
$reader
->name eq
'workbookPr'
and
my
$date_attr
=
$reader
->getAttribute(
'date1904'
)) {
$workbook_data
{base_year} = 1904
if
$date_attr
eq
'1'
or
$date_attr
eq
'true'
;
}
elsif
(
$reader
->name eq
'workbookView'
and
my
$active_attr
=
$reader
->getAttribute(
'activeTab'
)) {
$workbook_data
{active_sheet} =
$active_attr
+ 1
if
defined
$active_attr
;
}
}
return
\
%workbook_data
;
}
sub
_date_styles {
my
$self
=
shift
;
state
$date_style_regex
=
qr{[dy]|\bmm\b}
;
my
@date_styles
;
my
$xml_reader
=
$self
->_xml_reader_for_zip_member(
'xl/styles.xml'
);
my
@numFmt
=
$self
->Excel_builtin_date_formats;
my
$expected_subnode
=
undef
;
NODE:
while
(
$xml_reader
->
read
) {
next
NODE
if
$xml_reader
->nodeType == XML_READER_TYPE_END_ELEMENT;
if
(
$expected_subnode
) {
my
(
$name
,
$depth
,
$handler
) =
@$expected_subnode
;
if
(
$xml_reader
->name eq
$name
&&
$xml_reader
->depth ==
$depth
) {
$handler
->();
next
NODE;
}
elsif
(
$xml_reader
->depth <
$depth
) {
$expected_subnode
=
undef
;
}
}
if
(
$xml_reader
->name eq
'numFmts'
) {
$expected_subnode
= [
numFmt
=>
$xml_reader
->depth+
1
=>
sub
{
my
$id
=
$xml_reader
->getAttribute(
'numFmtId'
);
my
$code
=
$xml_reader
->getAttribute(
'formatCode'
);
$numFmt
[
$id
] =
$code
if
$id
&&
$code
&&
$code
=~
$date_style_regex
;
}];
}
elsif
(
$xml_reader
->name eq
'cellXfs'
) {
$expected_subnode
= [
xf
=>
$xml_reader
->depth+
1
=>
sub
{
state
$xf_count
= 0;
my
$numFmtId
=
$xml_reader
->getAttribute(
'numFmtId'
);
my
$code
=
$numFmt
[
$numFmtId
];
$date_styles
[
$xf_count
++] =
$code
;
}];
}
}
return
\
@date_styles
;
}
sub
_xml_reader {
my
(
$self
,
$xml
) =
@_
;
my
$reader
= XML::LibXML::Reader->new(
string
=>
$xml
,
no_blanks
=> 1,
no_network
=> 1,
huge
=> 1);
return
$reader
;
}
sub
_xml_reader_for_zip_member {
my
(
$self
,
$member_name
) =
@_
;
my
$contents
=
$self
->_zip_member_contents(
$member_name
);
return
$self
->_xml_reader(
$contents
);
}
sub
values
{
my
(
$self
,
$sheet
) =
@_
;
my
$has_date_formatter
=
$self
->frontend->date_formatter;
my
$sheet_member_name
=
$self
->_zip_member_name_for_sheet(
$sheet
);
my
$xml_reader
=
$self
->_xml_reader_for_zip_member(
$sheet_member_name
);
my
@data
;
my
(
$row
,
$col
) = (0, 0);
my
(
$cell_type
,
$cell_style
,
$seen_node
);
NODE:
while
(
$xml_reader
->
read
) {
my
$node_name
=
$xml_reader
->name;
my
$node_type
=
$xml_reader
->nodeType;
last
NODE
if
$node_name
eq
'sheetData'
&&
$node_type
== XML_READER_TYPE_END_ELEMENT;
next
NODE
if
$node_type
== XML_READER_TYPE_END_ELEMENT;
if
(
$node_name
eq
'row'
) {
my
$row_num
=
$xml_reader
->getAttribute(
'r'
);
$row
=
$row_num
//
$row
+ 1;
$col
= 0;
}
if
(
$node_name
eq
'c'
) {
my
$A1_cell_ref
=
$xml_reader
->getAttribute(
'r'
);
(
$col
,
$row
) =
$A1_cell_ref
?
do
{
my
(
$c
,
$r
) = (
$A1_cell_ref
=~ /^([A-Z]+)(\d+)$/);
(
$self
->A1_to_num(
$c
),
$r
)}
: (
$col
+1,
$row
);
$cell_type
=
$xml_reader
->getAttribute(
't'
);
$cell_style
=
$xml_reader
->getAttribute(
's'
);
$seen_node
=
''
;
}
elsif
(
$node_name
=~ /^[vtf]$/) {
$seen_node
=
$node_name
;
}
elsif
(
$node_name
eq
'#text'
) {
my
$val
=
$xml_reader
->value;
$cell_type
//=
''
;
if
(
$seen_node
eq
'v'
) {
if
(
$cell_type
eq
's'
) {
if
(looks_like_number(
$val
)) {
$val
=
$self
->strings->[
$val
];
}
else
{
warn
"unexpected non-numerical value: $val inside a node of shape <v t='s'>\n"
;
}
}
elsif
(
$cell_type
eq
'e'
) {
$val
=
undef
;
}
elsif
(
$cell_type
=~ /^(n|d|b|str|)$/) {
if
(
$has_date_formatter
&&
$cell_style
&& looks_like_number(
$val
) &&
$val
>= 0) {
my
$date_style
=
$self
->date_styles->[
$cell_style
];
$val
=
$self
->formatted_date(
$val
,
$date_style
)
if
$date_style
;
}
}
else
{
warn
"unsupported type '$cell_type' in cell L${row}C${col}\n"
;
$val
=
undef
;
}
$data
[
$row
-1][
$col
-1] =
$val
;
}
elsif
(
$seen_node
eq
't'
&&
$cell_type
eq
'inlineStr'
) {
no
warnings
'uninitialized'
;
$data
[
$row
-1][
$col
-1] .=
$val
;
}
elsif
(
$seen_node
eq
'f'
) {
}
else
{
warn
"unexpected text node in cell L${row}C${col}: $val\n"
;
}
}
}
$_
//= []
foreach
@data
;
return
\
@data
;
}
sub
_table_targets {
my
(
$self
,
$rel_xml
) =
@_
;
my
$xml_reader
=
$self
->_xml_reader(
$rel_xml
);
my
@table_targets
;
NODE:
while
(
$xml_reader
->
read
) {
my
$node_name
=
$xml_reader
->name;
my
$node_type
=
$xml_reader
->nodeType;
next
NODE
if
$node_type
== XML_READER_TYPE_END_ELEMENT;
if
(
$node_name
eq
'Relationship'
) {
my
$target
=
$xml_reader
->getAttribute(
'Target'
);
if
(
$target
=~ m[tables/table(\d+)\.xml]) {
push
@table_targets
, $1;
}
}
}
return
@table_targets
;
}
sub
_parse_table_xml {
my
(
$self
,
$xml
) =
@_
;
my
(
$name
,
$ref
,
$no_headers
,
@columns
);
my
$xml_reader
=
$self
->_xml_reader(
$xml
);
NODE:
while
(
$xml_reader
->
read
) {
my
$node_name
=
$xml_reader
->name;
my
$node_type
=
$xml_reader
->nodeType;
next
NODE
if
$node_type
== XML_READER_TYPE_END_ELEMENT;
if
(
$node_name
eq
'table'
) {
$name
=
$xml_reader
->getAttribute(
'displayName'
);
$ref
=
$xml_reader
->getAttribute(
'ref'
);
$no_headers
= (
$xml_reader
->getAttribute(
'headerRowCount'
) //
""
) eq
"0"
;
}
elsif
(
$node_name
eq
'tableColumn'
) {
push
@columns
,
$xml_reader
->getAttribute(
'name'
);
}
}
return
(
$name
,
$ref
, \
@columns
,
$no_headers
);
}
1;