From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

use strict;
use Carp 'croak';
use Exporter 'import';
use Scalar::Util 'blessed';
our @EXPORT_OK = qw/compile validate/;
our $VERSION = '1.6';
# Unavailable as custom validation names
my %builtin = map +($_,1), qw/
type
default
onerror
rmwhitespace
values scalar sort unique
keys unknown missing
func
/;
my %type_vals = map +($_,1), qw/scalar hash array any/;
my %unknown_vals = map +($_,1), qw/remove reject pass/;
my %missing_vals = map +($_,1), qw/create reject ignore/;
sub _length {
my($exp, $min, $max) = @_;
+{ _analyze_minlength => $min, _analyze_maxlength => $max, func => sub {
my $got = ref $_[0] eq 'HASH' ? keys %{$_[0]} : ref $_[0] eq 'ARRAY' ? @{$_[0]} : length $_[0];
(!defined $min || $got >= $min) && (!defined $max || $got <= $max) ? 1 : { expected => $exp, got => $got };
}}
}
# Basically the same as ( regex => $arg ), but hides the regex error
sub _reg {
my $reg = $_[0];
( type => 'scalar', _analyze_regex => "$reg", func => sub { $_[0] =~ $reg ? 1 : { got => $_[0] } } );
}
our $re_num = qr/^-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?$/;
my $re_int = qr/^-?(?:0|[1-9][0-9]*)$/;
our $re_uint = qr/^(?:0|[1-9][0-9]*)$/;
my $re_fqdn = qr/(?:[a-zA-Z0-9][\w-]*\.)+[a-zA-Z][a-zA-Z0-9-]{1,25}\.?/;
my $re_ip4_digit = qr/(?:0|[1-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])/;
my $re_ip4 = qr/($re_ip4_digit\.){3}$re_ip4_digit/;
# Doesn't allow IPv4-mapped-IPv6 addresses or other fancy stuff.
my $re_ip6 = qr/(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)/;
my $re_ip = qr/(?:$re_ip4|$re_ip6)/;
my $re_domain = qr/(?:$re_fqdn|$re_ip4|\[$re_ip6\])/;
# Also used by the TUWF::Misc::kv_validate()
our $re_email = qr/^[-\+\.#\$=\w]+\@$re_fqdn$/;
our $re_weburl = qr/^https?:\/\/$re_domain(?::[1-9][0-9]{0,5})?(?:\/[^\s<>"]*)$/;
our %default_validations = (
regex => sub {
my $reg = shift;
# Error objects should be plain data structures so that they can easily
# be converted to JSON for debugging. We have to stringify $reg in the
# error object to ensure that.
+{ type => 'scalar', _analyze_regex => "$reg", func => sub { $_[0] =~ $reg ? 1 : { regex => "$reg", got => $_[0] } } }
},
enum => sub {
my @l = ref $_[0] eq 'HASH' ? sort keys %{$_[0]} : ref $_[0] eq 'ARRAY' ? @{$_[0]} : ($_[0]);
my %opts = map +($_,1), @l;
+{ type => 'scalar', func => sub { $opts{ (my $v = $_[0]) } ? 1 : { expected => \@l, got => $_[0] } } }
},
minlength => sub { _length $_[0], $_[0] },
maxlength => sub { _length $_[0], undef, $_[0] },
length => sub { _length($_[0], ref $_[0] eq 'ARRAY' ? @{$_[0]} : ($_[0], $_[0])) },
anybool => { type => 'any', default => 0, func => sub { $_[0] = $_[0] ? 1 : 0; 1 } },
undefbool => { type => 'any', default => undef, func => sub { $_[0] = $_[0] ? 1 : 0; 1 } },
jsonbool => { type => 'any', func => sub {
my $r = $_[0];
blessed $r && (
$r->isa('JSON::PP::Boolean')
|| $r->isa('JSON::XS::Boolean')
|| $r->isa('Types::Serialiser::Boolean')
|| $r->isa('Cpanel::JSON::XS::Boolean')
|| $r->isa('boolean')
) ? 1 : {};
} },
num => { _reg $re_num },
int => { _reg $re_int }, # implies num
uint => { _reg $re_uint }, # implies num
min => sub {
my $min = shift;
+{ num => 1, _analyze_min => $min, func => sub { $_[0] >= $min ? 1 : { expected => $min, got => $_[0] } } }
},
max => sub {
my $max = shift;
+{ num => 1, _analyze_max => $max, func => sub { $_[0] <= $max ? 1 : { expected => $max, got => $_[0] } } }
},
range => sub { +{ min => $_[0][0], max => $_[0][1] } },
ascii => { _reg qr/^[\x20-\x7E]*$/ },
ipv4 => { _reg $re_ip4 },
ipv6 => { _reg $re_ip6 },
ip => { _reg $re_ip },
email => { _reg($re_email), maxlength => 254 },
weburl => { _reg($re_weburl), maxlength => 65536 }, # the maxlength is a bit arbitrary, but better than unlimited
);
# Loads a hashref of validations and a schema definition, and converts it into
# an object with:
# {
# name => $name_or_undef,
# validations => [ $recursive_compiled_object, .. ],
# schema => $modified_schema_without_validations,
# known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation
# }
sub _compile {
my($validations, $schema, $rec) = @_;
my(%top, @val);
my @keys = keys %{$schema->{keys}} if $schema->{keys};
for(sort keys %$schema) {
if($builtin{$_} || /^_analyze_/) {
$top{$_} = $schema->{$_};
next;
}
my $t = $validations->{$_} || $default_validations{$_};
croak "Unknown validation: $_" if !$t;
croak "Recursion limit exceeded while resolving validation '$_'" if $rec < 1;
$t = ref $t eq 'HASH' ? $t : $t->($schema->{$_});
my $v = _compile($validations, $t, $rec-1);
$v->{name} = $_;
push @val, $v;
}
# Inherit some builtin options from validations
for my $t (@val) {
if($top{type} && $t->{schema}{type} && $top{type} ne $t->{schema}{type}) {
croak "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{schema}{type}'" if $schema->{type};
croak "Incompatible types, '$t->[0]' requires '$t->{schema}{type}', but another validation requires '$top{type}'";
}
exists $t->{schema}{$_} and !exists $top{$_} and $top{$_} = delete $t->{schema}{$_}
for qw/default onerror rmwhitespace type scalar unknown missing sort unique/;
push @keys, keys %{ delete $t->{known_keys} };
push @keys, keys %{ $t->{schema}{keys} } if $t->{schema}{keys};
}
# Compile sub-schemas
$top{keys} = { map +($_, compile($validations, $top{keys}{$_})), keys %{$top{keys}} } if $top{keys};
$top{values} = compile($validations, $top{values}) if $top{values};
# XXX: Flattening recursive validations would be faster and may simplify
# the code a bit, but makes error objects harder to interpret.
# XXX: As an optimization, it's possible to remove double validations (e.g.
# multiple invocations of the same validation with the same options due to
# validations calling each other). Care must be taken that this won't
# affect error objects (i.e. only subsequent invocations should be
# removed).
return {
validations => \@val,
schema => \%top,
known_keys => { map +($_,1), @keys },
};
}
sub compile {
my($validations, $schema) = @_;
return $schema if ref $schema eq __PACKAGE__;
my $c = _compile $validations, $schema, 64;
$c->{schema}{type} //= 'scalar';
croak "Invalid value for 'type': $c->{schema}{type}" if !$type_vals{$c->{schema}{type}};
$c->{schema}{rmwhitespace} //= 1 if $c->{schema}{type} eq 'scalar';
$c->{schema}{unknown} //= 'remove';
$c->{schema}{missing} //= 'create';
croak "Invalid value for 'unknown': $c->{schema}{unknown}" if !$unknown_vals{$c->{schema}{unknown}};
croak "Invalid value for 'missing': $c->{schema}{missing}" if !$missing_vals{$c->{schema}{missing}};
delete $c->{schema}{default} if ref $c->{schema}{default} eq 'SCALAR' && ${$c->{schema}{default}} eq 'required';
delete $c->{schema}{keys} if $c->{schema}{type} ne 'hash';
delete $c->{schema}{values} if $c->{schema}{type} ne 'array';
delete $c->{schema}{sort} if $c->{schema}{type} ne 'array';
delete $c->{schema}{unique} if $c->{schema}{type} ne 'array';
if(exists $c->{schema}{sort}) {
my $s = $c->{schema}{sort};
$c->{schema}{sort} =
ref $s eq 'CODE' ? $s
: $s eq 'str' ? sub { $_[0] cmp $_[1] }
: $s eq 'num' ? sub { $_[0] <=> $_[1] }
: croak "Unknown value for 'sort': $c->{schema}{sort}";
}
$c->{schema}{unique} = sub { $_[0] } if $c->{schema}{unique} && !ref $c->{schema}{unique} && !$c->{schema}{sort};
bless $c, __PACKAGE__;
}
sub _validate_rec {
my($c, $input) = @_;
# hash keys
if($c->{schema}{keys}) {
my @err;
for my $k (keys %{$c->{schema}{keys}}) {
my $s = $c->{schema}{keys}{$k};
if(!exists $input->{$k}) {
next if $s->{schema}{missing} eq 'ignore';
return [$input, { validation => 'missing', key => $k }] if $s->{schema}{missing} eq 'reject';
$input->{$k} = ref $s->{schema}{default} eq 'CODE' ? $s->{schema}{default}->() : $s->{schema}{default} // undef;
next if exists $s->{schema}{default};
}
my $r = _validate($s, $input->{$k});
$input->{$k} = $r->[0];
if($r->[1]) {
$r->[1]{key} = $k;
push @err, $r->[1];
}
}
return [$input, { validation => 'keys', errors => \@err }] if @err;
}
# array values
if($c->{schema}{values}) {
my @err;
for my $i (0..$#$input) {
my $r = _validate($c->{schema}{values}, $input->[$i]);
$input->[$i] = $r->[0];
if($r->[1]) {
$r->[1]{index} = $i;
push @err, $r->[1];
}
}
return [$input, { validation => 'values', errors => \@err }] if @err;
}
# validations
for (@{$c->{validations}}) {
my $r = _validate_rec($_, $input);
$input = $r->[0];
return [$input, {
# If the error was a custom 'func' object, then make that the primary cause.
# This makes it possible for validations to provide their own error objects.
$r->[1]{validation} eq 'func' && (!exists $r->[1]{result} || keys %{$r->[1]} > 2) ? %{$r->[1]} : (error => $r->[1]),
validation => $_->{name},
}] if $r->[1];
}
# func
if($c->{schema}{func}) {
my $r = $c->{schema}{func}->($input);
return [$input, { %$r, validation => 'func' }] if ref $r eq 'HASH';
return [$input, { validation => 'func', result => $r }] if !$r;
}
return [$input]
}
sub _validate_array {
my($c, $input) = @_;
return [$input] if $c->{schema}{type} ne 'array';
$input = [sort { $c->{schema}{sort}->($a,$b) } @$input ] if $c->{schema}{sort};
# Key-based uniqueness
if($c->{schema}{unique} && ref $c->{schema}{unique} eq 'CODE') {
my %h;
for my $i (0..$#$input) {
my $k = $c->{schema}{unique}->($input->[$i]);
return [$input, { validation => 'unique', index_a => $h{$k}, value_a => $input->[$h{$k}], index_b => $i, value_b => $input->[$i], key => $k }] if exists $h{$k};
$h{$k} = $i;
}
# Comparison-based uniqueness
} elsif($c->{schema}{unique}) {
for my $i (0..$#$input-1) {
return [$input, { validation => 'unique', index_a => $i, value_a => $input->[$i], index_b => $i+1, value_b => $input->[$i+1] }]
if $c->{schema}{sort}->($input->[$i], $input->[$i+1]) == 0
}
}
return [$input]
}
sub _validate_input {
my($c, $input) = @_;
# rmwhitespace (needs to be done before the 'default' test)
if(defined $input && !ref $input && $c->{schema}{type} eq 'scalar' && $c->{schema}{rmwhitespace}) {
$input =~ s/\r//g;
$input =~ s/^\s*//;
$input =~ s/\s*$//;
}
# default
if(!defined $input || (!ref $input && $input eq '')) {
return [ref $c->{schema}{default} eq 'CODE' ? $c->{schema}{default}->($input) : $c->{schema}{default}] if exists $c->{schema}{default};
return [$input, { validation => 'required' }];
}
if($c->{schema}{type} eq 'scalar') {
return [$input, { validation => 'type', expected => 'scalar', got => lc ref $input }] if ref $input;
} elsif($c->{schema}{type} eq 'hash') {
return [$input, { validation => 'type', expected => 'hash', got => lc ref $input || 'scalar' }] if ref $input ne 'HASH';
# unknown
# Each branch below makes a shallow copy of the hash, so that further
# validations can perform in-place modifications without affecting the
# input.
if($c->{schema}{unknown} eq 'remove') {
$input = { map +($_, $input->{$_}), grep $c->{known_keys}{$_}, keys %$input };
} elsif($c->{schema}{unknown} eq 'reject') {
my @err = grep !$c->{known_keys}{$_}, keys %$input;
return [$input, { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] }] if @err;
$input = { %$input };
} else {
$input = { %$input };
}
} elsif($c->{schema}{type} eq 'array') {
$input = [$input] if $c->{schema}{scalar} && !ref $input;
return [$input, { validation => 'type', expected => $c->{schema}{scalar} ? 'array or scalar' : 'array', got => lc ref $input || 'scalar' }] if ref $input ne 'ARRAY';
$input = [@$input]; # Create a shallow copy to prevent in-place modification.
} elsif($c->{schema}{type} eq 'any') {
# No need to do anything here.
} else {
croak "Unknown type '$c->{schema}{type}'"; # Should be checked in _compile(), preferably.
}
my $r = _validate_rec($c, $input);
return $r if $r->[1];
$input = $r->[0];
_validate_array($c, $input);
}
sub _validate {
my($c, $input) = @_;
my $r = _validate_input($c, $input);
$r->[1] && exists $c->{schema}{onerror} ? [ref $c->{schema}{onerror} eq 'CODE' ? $c->{schema}{onerror}->(bless $r, 'TUWF::Validate::Result') : $c->{schema}{onerror}] : $r
}
sub validate {
my($c, $input) = ref $_[0] eq __PACKAGE__ ? @_ : (compile($_[0], $_[1]), $_[2]);
bless _validate($c, $input), 'TUWF::Validate::Result';
}
sub analyze {
TUWF::Validate::Interop::analyze($_[0]);
}
use strict;
use Carp 'croak';
# A result object contains: [$data, $error]
# In boolean context, returns whether the validation succeeded.
use overload bool => sub { !$_[0][1] };
# Returns the validation errors, or undef if validation succeeded
sub err { $_[0][1] }
# Returns the validated and normalized input, dies if validation didn't succeed.
sub data {
if($_[0][1]) {
require Data::Dumper;
my $s = Data::Dumper->new([$_[0][1]])->Terse(1)->Pair(':')->Indent(0)->Sortkeys(1)->Dump;
croak "Validation failed: $s";
}
$_[0][0]
}
# Same as 'data', but returns partially validated and normalized data if validation failed.
sub unsafe_data { $_[0][0] }
# TODO: Human-readable error message formatting
1;