NAME
Tags::Reader::Perl - Parse SGML/HTML/XML by
each
"tag"
.
SYNOPSIS
use
Tags::Reader::Perl;
my
$obj
= Tags::Reader::Perl->new;
my
@tokens
=
$obj
->gettoken;
$obj
->set_file(
$file
,
$force
);
$obj
->set_text(
$text
,
$force
);
METHODS
new()
my
$obj
= Tags::Reader::Perl->new;
Constructor.
Returns instance of object.
gettoken
my
@tokens
=
$obj
->gettoken;
Get parsed token.
Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1)
Returns parsed token in scalar mode. e.g. <xml> → '<xml>'
set_file
$obj
->set_file(
$file
,
$force
);
Set file for parsing. If $force present, reset file for parsing if exists previous text or file.
Returns undef.
set_text
$obj
->set_text(
$text
,
$force
);
Set text for parsing. if $force present, reset text for parsing if exists previous text or file.
Returns undef.
TOKEN STRUCTURE
Structure contains 4 fields in array:
- parsed data
- tag type
- number of line
- number of column in line
Tag types are:
-
'[\w:]+'
- element name.
-
'/[\w:]+'
- end of element name.
-
'!data'
- data
-
'![cdata['
- cdata
-
'!--'
- comment
-
'?\w+'
- instruction
-
'![\w+'
- conditional
-
'!attlist'
- DTD attlist
-
'!element'
- DTD element
-
'!entity'
- DTD entity
-
'!notation'
- DTD notation
ERRORS
new():
From Class::Utils::set_params():
Unknown parameter
'%s'
.
set_text():
Bad tag.
Bad text.
Cannot set new data
if
exists
data.
set_file():
Bad tag.
Bad file.
Cannot set new data
if
exists
data.
Cannot
open
file
'%s'
.
EXAMPLE1
use
strict;
use
warnings;
use
Tag::Reader::Perl;
# Object.
my
$obj
= Tag::Reader::Perl->new;
# Example data.
my
$sgml
=
<<'END';
<DOKUMENT>
<adresa stát="cs">
<město>
<ulice>Nová</ulice>
<číslo>5</číslo>
</adresa>
</DOKUMENT>
END
# Set data to object.
$obj
->set_text(decode_utf8(
$sgml
));
# Tokenize.
while
(
my
@tag
=
$obj
->gettoken) {
"[\n"
;
"\t[0]: '"
.encode_utf8(
$tag
[0]).
"'\n"
;
"\t[1]: "
.encode_utf8(
$tag
[1]).
"\n"
;
"\t[2]: $tag[2]\n"
;
"\t[3]: $tag[3]\n"
;
"]\n"
;
}
# Output:
# [
# [0]: '<DOKUMENT>'
# [1]: dokument
# [2]: 1
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 1
# [3]: 11
# ]
# [
# [0]: '<adresa stát="cs">'
# [1]: adresa
# [2]: 2
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 2
# [3]: 21
# ]
# [
# [0]: '<město>'
# [1]: město
# [2]: 3
# [3]: 5
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 3
# [3]: 12
# ]
# [
# [0]: '<ulice>'
# [1]: ulice
# [2]: 4
# [3]: 5
# ]
# [
# [0]: 'Nová'
# [1]: !data
# [2]: 4
# [3]: 12
# ]
# [
# [0]: '</ulice>'
# [1]: /ulice
# [2]: 4
# [3]: 16
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 4
# [3]: 24
# ]
# [
# [0]: '<číslo>'
# [1]: číslo
# [2]: 5
# [3]: 5
# ]
# [
# [0]: '5'
# [1]: !data
# [2]: 5
# [3]: 12
# ]
# [
# [0]: '</číslo>'
# [1]: /číslo
# [2]: 5
# [3]: 13
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 5
# [3]: 21
# ]
# [
# [0]: '</adresa>'
# [1]: /adresa
# [2]: 6
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 6
# [3]: 12
# ]
# [
# [0]: '</DOKUMENT>'
# [1]: /dokument
# [2]: 7
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 7
# [3]: 12
# ]
DEPENDENCIES
Class::Utils, Error::Pure, Readonly,
SEE ALSO
- Tag::Reader
-
Parse SGML/HTML/XML by each "tag".
- HTML::TagReader
-
Perl extension module for reading html/sgml/xml files by tags.
REPOSITORY
https://github.com/michal-josef-spacek/Tag-Reader-Perl
AUTHOR
Michal Josef Špaček mailto:skim@cpan.org
LICENSE AND COPYRIGHT
© Michal Josef Špaček 2005-2021
BSD 2-Clause License
VERSION
0.02