Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".
use Tags::Reader::Perl; my $obj = Tags::Reader::Perl->new; my @tokens = $obj->gettoken; $obj->set_file($file, $force); $obj->set_text($text, $force);
new()
my $obj = Tags::Reader::Perl->new;
Constructor.
Returns instance of object.
gettoken
my @tokens = $obj->gettoken;
Get parsed token.
Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1)
Returns parsed token in scalar mode. e.g. <xml> → '<xml>'
set_file
$obj->set_file($file, $force);
Set file for parsing. If $force present, reset file for parsing if exists previous text or file.
Returns undef.
set_text
$obj->set_text($text, $force);
Set text for parsing. if $force present, reset text for parsing if exists previous text or file.
Structure contains 4 fields in array: - parsed data - tag type - number of line - number of column in line Tag types are: - '[\w:]+' - element name. - '/[\w:]+' - end of element name. - '!data' - data - '![cdata[' - cdata - '!--' - comment - '?\w+' - instruction - '![\w+' - conditional - '!attlist' - DTD attlist - '!element' - DTD element - '!entity' - DTD entity - '!notation' - DTD notation
new(): From Class::Utils::set_params(): Unknown parameter '%s'. set_text(): Bad tag. Bad text. Cannot set new data if exists data. set_file(): Bad tag. Bad file. Cannot set new data if exists data. Cannot open file '%s'.
use strict; use warnings; use Tag::Reader::Perl; use Unicode::UTF8 qw(decode_utf8 encode_utf8); # Object. my $obj = Tag::Reader::Perl->new; # Example data. my $sgml = <<'END'; <DOKUMENT> <adresa stát="cs"> <město> <ulice>Nová</ulice> <číslo>5</číslo> </adresa> </DOKUMENT> END # Set data to object. $obj->set_text(decode_utf8($sgml)); # Tokenize. while (my @tag = $obj->gettoken) { print "[\n"; print "\t[0]: '".encode_utf8($tag[0])."'\n"; print "\t[1]: ".encode_utf8($tag[1])."\n"; print "\t[2]: $tag[2]\n"; print "\t[3]: $tag[3]\n"; print "]\n"; } # Output: # [ # [0]: '<DOKUMENT>' # [1]: dokument # [2]: 1 # [3]: 1 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 1 # [3]: 11 # ] # [ # [0]: '<adresa stát="cs">' # [1]: adresa # [2]: 2 # [3]: 3 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 2 # [3]: 21 # ] # [ # [0]: '<město>' # [1]: město # [2]: 3 # [3]: 5 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 3 # [3]: 12 # ] # [ # [0]: '<ulice>' # [1]: ulice # [2]: 4 # [3]: 5 # ] # [ # [0]: 'Nová' # [1]: !data # [2]: 4 # [3]: 12 # ] # [ # [0]: '</ulice>' # [1]: /ulice # [2]: 4 # [3]: 16 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 4 # [3]: 24 # ] # [ # [0]: '<číslo>' # [1]: číslo # [2]: 5 # [3]: 5 # ] # [ # [0]: '5' # [1]: !data # [2]: 5 # [3]: 12 # ] # [ # [0]: '</číslo>' # [1]: /číslo # [2]: 5 # [3]: 13 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 5 # [3]: 21 # ] # [ # [0]: '</adresa>' # [1]: /adresa # [2]: 6 # [3]: 3 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 6 # [3]: 12 # ] # [ # [0]: '</DOKUMENT>' # [1]: /dokument # [2]: 7 # [3]: 1 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 7 # [3]: 12 # ]
Class::Utils, Error::Pure, Readonly,
Parse SGML/HTML/XML by each "tag".
Perl extension module for reading html/sgml/xml files by tags.
https://github.com/michal-josef-spacek/Tag-Reader-Perl
Michal Josef Špaček mailto:skim@cpan.org
http://skim.cz
© Michal Josef Špaček 2005-2021
BSD 2-Clause License
0.02
To install Tag::Reader::Perl, copy and paste the appropriate command in to your terminal.
cpanm
cpanm Tag::Reader::Perl
CPAN shell
perl -MCPAN -e shell install Tag::Reader::Perl
For more information on module installation, please visit the detailed CPAN module installation guide.