The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

NAME

Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".

SYNOPSIS

my $obj = Tags::Reader::Perl->new;
my @tokens = $obj->gettoken;
$obj->set_file($file, $force);
$obj->set_text($text, $force);

METHODS

new()

my $obj = Tags::Reader::Perl->new;

Constructor.

Returns instance of object.

gettoken

my @tokens = $obj->gettoken;

Get parsed token.

Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1)

Returns parsed token in scalar mode. e.g. <xml> → '<xml>'

set_file

$obj->set_file($file, $force);

Set file for parsing. If $force present, reset file for parsing if exists previous text or file.

Returns undef.

set_text

$obj->set_text($text, $force);

Set text for parsing. if $force present, reset text for parsing if exists previous text or file.

Returns undef.

TOKEN STRUCTURE

Structure contains 4 fields in array:
- parsed data
- tag type
- number of line
- number of column in line
Tag types are:
- '[\w:]+' - element name.
- '/[\w:]+' - end of element name.
- '!data' - data
- '![cdata[' - cdata
- '!--' - comment
- '?\w+' - instruction
- '![\w+' - conditional
- '!attlist' - DTD attlist
- '!element' - DTD element
- '!entity' - DTD entity
- '!notation' - DTD notation

ERRORS

new():
From Class::Utils::set_params():
Unknown parameter '%s'.
set_text():
Bad tag.
Bad text.
Cannot set new data if exists data.
set_file():
Bad tag.
Bad file.
Cannot set new data if exists data.
Cannot open file '%s'.

EXAMPLE1

use strict;
use Unicode::UTF8 qw(decode_utf8 encode_utf8);
# Object.
my $obj = Tag::Reader::Perl->new;
# Example data.
my $sgml = <<'END';
<DOKUMENT>
<adresa stát="cs">
<město>
<ulice>Nová</ulice>
<číslo>5</číslo>
</adresa>
</DOKUMENT>
END
# Set data to object.
$obj->set_text(decode_utf8($sgml));
# Tokenize.
while (my @tag = $obj->gettoken) {
print "[\n";
print "\t[0]: '".encode_utf8($tag[0])."'\n";
print "\t[1]: ".encode_utf8($tag[1])."\n";
print "\t[2]: $tag[2]\n";
print "\t[3]: $tag[3]\n";
print "]\n";
}
# Output:
# [
# [0]: '<DOKUMENT>'
# [1]: dokument
# [2]: 1
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 1
# [3]: 11
# ]
# [
# [0]: '<adresa stát="cs">'
# [1]: adresa
# [2]: 2
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 2
# [3]: 21
# ]
# [
# [0]: '<město>'
# [1]: město
# [2]: 3
# [3]: 5
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 3
# [3]: 12
# ]
# [
# [0]: '<ulice>'
# [1]: ulice
# [2]: 4
# [3]: 5
# ]
# [
# [0]: 'Nová'
# [1]: !data
# [2]: 4
# [3]: 12
# ]
# [
# [0]: '</ulice>'
# [1]: /ulice
# [2]: 4
# [3]: 16
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 4
# [3]: 24
# ]
# [
# [0]: '<číslo>'
# [1]: číslo
# [2]: 5
# [3]: 5
# ]
# [
# [0]: '5'
# [1]: !data
# [2]: 5
# [3]: 12
# ]
# [
# [0]: '</číslo>'
# [1]: /číslo
# [2]: 5
# [3]: 13
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 5
# [3]: 21
# ]
# [
# [0]: '</adresa>'
# [1]: /adresa
# [2]: 6
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 6
# [3]: 12
# ]
# [
# [0]: '</DOKUMENT>'
# [1]: /dokument
# [2]: 7
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 7
# [3]: 12
# ]

DEPENDENCIES

Class::Utils, Error::Pure, Readonly,

SEE ALSO

Tag::Reader

Parse SGML/HTML/XML by each "tag".

HTML::TagReader

Perl extension module for reading html/sgml/xml files by tags.

REPOSITORY

https://github.com/michal-josef-spacek/Tag-Reader-Perl

AUTHOR

Michal Josef Špaček mailto:skim@cpan.org

http://skim.cz

LICENSE AND COPYRIGHT

© Michal Josef Špaček 2005-2021

BSD 2-Clause License

VERSION

0.02