The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

use strict;
use Carp;
=head1 NAME
Mail::Alias::Reader::Token
=head1 DESCRIPTION
Mail::Alias::Reader::Token is not only the class represents an aliases(5) parser
token, but also itself is returned by L<Mail::Alias::Reader> as a representation
of a mail alias destination. For the purposes of this documentation, only the
public-facing methods which facilitate the usage of instances of this class
shall be discussed.
=cut
my @TOKEN_TYPES = (
[ 'T_COMMENT' => qr/#\s*(.*)$/ ],
[ 'T_STRING' => qr/("(?:\\.|[^"\\]+)*")/ ],
[ 'T_COMMA' => qr/,/ ],
[ 'T_DIRECTIVE' => qr/:([^\:\s]+):([^\:\s,]+)/ ],
[ 'T_COMMAND' => qr/\|(\S+)/ ],
[ 'T_ADDRESS' => qr/([a-z0-9_\-@\.*]+)/i ],
[ 'T_COLON' => qr/\:/ ],
[ 'T_FILE' => qr/(\S+)/ ],
[ 'T_WHITESPACE' => qr/\s+/ ],
);
my @TOKEN_STRING_TYPES = (
[ 'T_DIRECTIVE' => qr/:([^\:\s]+):\s*(.*)/s ],
[ 'T_COMMAND' => qr/\|(.*)/s ],
[ 'T_ADDRESS' => qr/([^\/]+)/s ],
[ 'T_FILE' => qr/(.*)/s ]
);
#
# Mail::Alias::Reader::Token->new($type)
#
# Create a new mail alias parser token of the given type. This method isn't
# actually meant to be called publically; rather, it is simply a shortcut to
# create symbolic parser tokens that contain no data, but refer to a piece of
# punctuation, or similar.
#
sub new {
my ( $class, $type ) = @_;
return bless { 'type' => $type }, $class;
}
#
# $token->isa(@types)
#
# Return true if the current token is of any of the types passed as an
# argument.
#
sub isa {
my ( $self, @types ) = @_;
foreach my $type (@types) {
return 1 if $self->{'type'} eq $type;
}
return 0;
}
#
# $token->is_punct()
#
# Returns true if the current token represents a piece of punctuation, or
# something that separates values, clauses, or declarations from one another.
#
sub is_punct {
return shift->isa(qw/T_BEGIN T_END T_COLON T_COMMA/);
}
#
# $token->is_value()
#
# Returns true if the current token represents a meaningful value recorded in
# text, such as a mail transfer agent directive, a command to pass message to,
# a local or remote mailing address, or a file to append messages to.
#
sub is_value {
return shift->isa(qw/T_DIRECTIVE T_COMMAND T_ADDRESS T_FILE/);
}
=head1 DETERMINING MAIL DESTINATION TYPE
A variety of methods are provided to allow one to infer the type of a mail
alias (parser token) returned.
=over
=item $destination->is_address()
Returns true if the mail destination described by the current token is a local
part or fully qualified mail address.
=cut
sub is_address {
return shift->isa('T_ADDRESS');
}
=item $destination->is_directive()
Returns true if the mail destination described by the current token is a
mail transfer agent directive.
=cut
sub is_directive {
return shift->isa('T_DIRECTIVE');
}
=item $destination->is_command()
Returns true if the mail destination described by the current token is a
command to which mail messages should be piped.
=cut
sub is_command {
return shift->isa('T_COMMAND');
}
=item $destination->is_file()
Returns true if the mail destination described by the current token is a file
to which mail messages should be appended.
=back
=cut
sub is_file {
return shift->isa('T_FILE');
}
=head1 CONVERTING THE MAIL DESTINATION TO A STRING
=over
=item $destination->value()
Returns a parsed and unescaped logical representation of the mail alias
destination that was originally parsed to yield the current token object.
=cut
sub value {
return shift->{'value'};
}
=item $destination->to_string()
Returns a string representation of the mail alias destination that was
originally parsed to yield the current token object.
=back
=cut
sub to_string {
my ($self) = @_;
my %SUBSTITUTIONS = (
"\r" => '\r',
"\n" => '\n',
"\t" => '\t',
'"' => '\"'
);
my $value = $self->{'value'};
if ($value) {
foreach my $search (keys %SUBSTITUTIONS) {
$value =~ s/$search/$SUBSTITUTIONS{$search}/g;
}
}
#
# Since not every token type has a "value", per se, lazy evaluation is
# necessary to prevent a Perl runtime warning when evaluating the 'T_COMMENT'
# part of this hash when dealing with tokens that are anything other than a
# comment.
#
my %FORMATTERS = (
'T_COMMENT' => sub { "# $value" },
'T_COMMA' => sub { ',' },
'T_COLON' => sub { ':' },
'T_WHITESPACE' => sub { ' ' }
);
return $FORMATTERS{ $self->{'type'} }->() if exists $FORMATTERS{ $self->{'type'} };
my $ret;
if ( $self->is_directive ) {
$ret = ":$self->{'name'}:$value";
}
elsif ( $self->is_command ) {
$ret = "|$value";
}
else {
$ret = $value;
}
#
# If the data to be returned contains spaces, then wrap it with double quotes
# before returning it to the user.
#
$ret =~ s/^(.*)$/"$1"/ if $ret =~ /\s/;
return $ret;
}
#
# Mail::Alias::Reader::Token->tokenize_for_types($buf, @types)
#
# Transform the given text buffer, $buf, into a series of tokens, based on the
# rules passed in @types (defined near the top of this file). Returns an ARRAY
# of tokens that were matched based on the rules in @types versus the input
# buffer.
#
# As the token types are associated with their parsing rules, and are given in
# an ordered manner, proper precedence can be followed and ambiguity in lexing
# can be overcome.
#
# This method does not provide the main tokenizing interface; rather, it only
# facilitates for the easy access of a single pass of tokenizing, and is called
# by the Mail::Alias::Reader::Token->tokenize() method.
#
sub tokenize_for_types {
my ( $class, $buf, @types ) = @_;
my @tokens;
match: while ($buf) {
foreach my $type (@types) {
next unless $buf =~ s/^$type->[1]//;
my $token = bless {
'type' => $type->[0],
}, $class;
if ( $type->[0] eq 'T_DIRECTIVE' ) {
@{$token}{qw(name value)} = ( $1, $2 );
}
else {
$token->{'value'} = $1;
}
push @tokens, $token;
next match;
}
}
return \@tokens;
}
#
# Mail::Alias::Reader::Token->tokenize($buf)
#
# Returns an ARRAY of tokens parsed from the given text buffer.
#
# This method tokenizes in two stages; first, it performs a high-level sweep of
# any statements not inside double quotes, though while grabbing double-quoted
# statements and holding onto them for a later second pass. During this second
# tokenization pass, performed for each double-quoted statement found and in the
# order of first-stage tokenization, statements containing spaces are parsed.
#
# Since this method is intended to be used on a single line of input, a T_BEGIN
# and T_END token comes as the first and the last token returned, respectively.
#
sub tokenize {
my ( $class, $buf ) = @_;
#
# When parsing token data contained within double quotes, the following
# escape sequence patterns and substitutions are iterated over for each
# double quoted expression, performing unescaping where necessary.
#
my %WHITESPACE = (
'r' => "\r",
'n' => "\n",
't' => "\t"
);
my @STRING_ESCAPE_SEQUENCES = (
[ qr/\\(0\d*)/ => sub { pack 'W', oct($1) } ],
[ qr/\\(x[0-9a-f]+)/ => sub { pack 'W', hex("0$1") } ],
[ qr/\\([rnt])/ => sub { $WHITESPACE{$1} } ],
[ qr/\\([^rnt])/ => sub { $1 } ]
);
#
# Perform first stage tokenization on the input.
#
my $tokens = $class->tokenize_for_types( $buf, @TOKEN_TYPES );
foreach my $token ( @{$tokens} ) {
#
# Perform second stage tokenization on any T_STRING tokens found. As the aliases(5)
# format lacks a string literal type, a second pass is required to parse the quote
# delimited string out for a more specific type.
#
if ( $token->isa('T_STRING') ) {
$token->{'value'} =~ s/^"(.*)"$/$1/s;
#
# Parse for any escape sequences that may be present.
#
foreach my $sequence (@STRING_ESCAPE_SEQUENCES) {
my ( $pattern, $subst ) = @{$sequence};
$token->{'value'} =~ s/$pattern/$subst->()/seg;
}
#
# Create a new token from the second pass parsing step for the string
# contents, copying the data directly into the existing token (so as to
# not lose the previous reference).
#
%{$token} = %{$class->tokenize_for_types( $token->{'value'}, @TOKEN_STRING_TYPES )->[0]};
}
}
return [
$class->new('T_BEGIN'),
@{$tokens},
$class->new('T_END')
];
}
1;
__END__
=head1 AUTHOR
Erin Schoenhals E<lt>erin@cpanel.netE<gt>
=head1 COPYRIGHT
Copyright(c) 2011 cPanel, Inc. All rights reserved.