From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

# Header section
%{
use warnings;
use Scalar::Util qw(blessed);
use OPTIMADE::Filter::AndOr;
use OPTIMADE::Filter::Boolean;
use OPTIMADE::Filter::Comparison;
use OPTIMADE::Filter::Known;
use OPTIMADE::Filter::ListComparison;
use OPTIMADE::Filter::Negation;
use OPTIMADE::Filter::Property;
use OPTIMADE::Filter::Zip;
our $allow_LIKE_operator = 0;
%}
%%
# Rules section
# The top-level 'filter' rule
filter: expression ;
# Values
ordered_constant: string | number ;
unordered_constant: TRUE
{
return OPTIMADE::Filter::Boolean->new( 1 );
}
| FALSE
{
return OPTIMADE::Filter::Boolean->new( '' );
}
;
value: unordered_constant | ordered_value ;
ordered_value: ordered_constant | property ;
value_list_entry: value
{
return [ '=', $_[1] ];
}
| value_eq_rhs
{
return [ $_[1]->operator, $_[1]->left ];
}
| value_rel_comp_rhs
{
return [ $_[1]->operator, $_[1]->left ];
}
| fuzzy_string_op_rhs
{
return [ $_[1]->operator, $_[1]->left ];
}
;
value_list: value_list_entry
{
return [ $_[1] ];
}
| value_list comma value_list_entry
{
push @{$_[1]}, $_[3];
return $_[1];
}
;
value_zip: value_list_entry colon value_list_entry
{
return [ $_[1], $_[3] ];
}
| value_zip colon value_list_entry
{
push @{$_[1]}, $_[3];
return $_[1];
}
;
value_zip_list: value_zip
{
return [ $_[1] ];
}
| value_zip_list comma value_zip
{
push @{$_[1]}, $_[3];
return $_[1];
}
;
# Expressions
expression: expression_clause
| expression_clause OR expression
{
return OPTIMADE::Filter::AndOr->new( @_[1..$#_] );
}
;
expression_clause: expression_phrase
| expression_phrase AND expression_clause
{
return OPTIMADE::Filter::AndOr->new( @_[1..$#_] );
}
;
expression_phrase: comparison
| openingbrace expression closingbrace
{
return $_[2];
}
| NOT comparison
{
return OPTIMADE::Filter::Negation->new( $_[2] );
}
| NOT openingbrace expression closingbrace
{
return OPTIMADE::Filter::Negation->new( $_[3] );
}
;
comparison: constant_first_comparison | property_first_comparison ;
constant_first_comparison: ordered_constant value_op_rhs
{
$_[2]->unshift_operand( $_[1] );
return $_[2];
}
| unordered_constant value_eq_rhs
{
$_[2]->unshift_operand( $_[1] );
return $_[2];
}
;
property_first_comparison: property
{
my $cmp = OPTIMADE::Filter::Comparison->new( '=' );
$cmp->left( $_[1] );
$cmp->right( OPTIMADE::Filter::Boolean->new( 1 ) );
return $cmp;
}
| property value_op_rhs
{
$_[2]->unshift_operand( $_[1] );
return $_[2];
}
| property known_op_rhs
{
$_[2]->property( $_[1] );
return $_[2];
}
| property fuzzy_string_op_rhs
{
$_[2]->unshift_operand( $_[1] );
return $_[2];
}
| property set_op_rhs
{
$_[2]->property( $_[1] );
return $_[2];
}
| property set_zip_op_rhs
{
$_[2]->unshift_property( $_[1] );
return $_[2];
}
| property length_op_rhs
{
$_[2]->property( $_[1] );
return $_[2];
}
;
value_op_rhs: value_eq_rhs | value_rel_comp_rhs ;
value_eq_rhs: equality_operator value
{
my $cmp = OPTIMADE::Filter::Comparison->new( $_[1] );
$cmp->push_operand( $_[2] );
return $cmp;
}
;
value_rel_comp_rhs: relative_comparison_operator ordered_value
{
my $cmp = OPTIMADE::Filter::Comparison->new( $_[1] );
$cmp->push_operand( $_[2] );
return $cmp;
}
;
known_op_rhs: IS KNOWN
{
return OPTIMADE::Filter::Known->new( 1 );
}
| IS UNKNOWN
{
return OPTIMADE::Filter::Known->new( 0 );
}
;
fuzzy_string_op_rhs: CONTAINS value
{
my $cmp = OPTIMADE::Filter::Comparison->new( $_[1] );
$cmp->push_operand( $_[2] );
return $cmp;
}
| STARTS value
{
my $cmp = OPTIMADE::Filter::Comparison->new( $_[1] );
$cmp->push_operand( $_[2] );
return $cmp;
}
| STARTS WITH value
{
my $cmp = OPTIMADE::Filter::Comparison->new( "$_[1] $_[2]" );
$cmp->push_operand( $_[3] );
return $cmp;
}
| ENDS value
{
my $cmp = OPTIMADE::Filter::Comparison->new( $_[1] );
$cmp->push_operand( $_[2] );
return $cmp;
}
| ENDS WITH value
{
my $cmp = OPTIMADE::Filter::Comparison->new( "$_[1] $_[2]" );
$cmp->push_operand( $_[3] );
return $cmp;
}
| LIKE value
{
my $cmp = OPTIMADE::Filter::Comparison->new( $_[1] );
$cmp->push_operand( $_[2] );
return $cmp;
}
;
set_op_rhs: HAS value
{
my $lc = OPTIMADE::Filter::ListComparison->new( $_[1] );
$lc->values( [ [ '=', $_[2] ] ] );
return $lc;
}
| HAS equality_operator value
{
my $lc = OPTIMADE::Filter::ListComparison->new( $_[1] );
$lc->values( [ [ $_[2], $_[3] ] ] );
return $lc;
}
| HAS relative_comparison_operator ordered_value
{
my $lc = OPTIMADE::Filter::ListComparison->new( $_[1] );
$lc->values( [ [ $_[2], $_[3] ] ] );
return $lc;
}
| HAS fuzzy_string_op_rhs
{
my $lc = OPTIMADE::Filter::ListComparison->new( $_[1] );
$lc->values( [ [ $_[2]->operator, $_[2]->left ] ] );
return $lc;
}
| HAS ALL value_list
{
my $lc = OPTIMADE::Filter::ListComparison->new( "$_[1] $_[2]" );
$lc->values( $_[3] );
return $lc;
}
| HAS ANY value_list
{
my $lc = OPTIMADE::Filter::ListComparison->new( "$_[1] $_[2]" );
$lc->values( $_[3] );
return $lc;
}
| HAS ONLY value_list
{
my $lc = OPTIMADE::Filter::ListComparison->new( "$_[1] $_[2]" );
$lc->values( $_[3] );
return $lc;
}
;
set_zip_op_rhs: property_zip_addon HAS value_zip
{
$_[1]->operator( $_[2] );
$_[1]->values( [ $_[3] ] );
return $_[1];
}
| property_zip_addon HAS ONLY value_zip_list
{
$_[1]->operator( "$_[2] $_[3]" );
$_[1]->values( $_[4] );
return $_[1];
}
| property_zip_addon HAS ALL value_zip_list
{
$_[1]->operator( "$_[2] $_[3]" );
$_[1]->values( $_[4] );
return $_[1];
}
| property_zip_addon HAS ANY value_zip_list
{
$_[1]->operator( "$_[2] $_[3]" );
$_[1]->values( $_[4] );
return $_[1];
}
;
property_zip_addon: colon property
{
my $zip = OPTIMADE::Filter::Zip->new;
$zip->push_property( $_[2] );
return $zip;
}
| property_zip_addon colon property
{
$_[1]->push_property( $_[3] );
return $_[1];
}
;
length_op_rhs: LENGTH value
{
my $cmp = OPTIMADE::Filter::ListComparison->new( $_[1] );
$cmp->values( [ [ '=', $_[2] ] ] );
return $cmp;
}
| LENGTH operator value
{
my $cmp = OPTIMADE::Filter::ListComparison->new( $_[1] );
$cmp->values( [ [ $_[2], $_[3] ] ] );
return $cmp;
}
;
# Property
property: identifier
{
return OPTIMADE::Filter::Property->new( $_[1] );
}
| property dot identifier
{
push @{$_[1]}, $_[3];
return $_[1];
}
;
# Separators
openingbrace: '(' ;
closingbrace: ')' ;
dot: '.' ;
comma: ',' ;
colon: ':' ;
# Comparison operator tokens
operator: equality_operator | relative_comparison_operator ;
equality_operator: '='
| '!' '='
{
return join( '', @_[1..$#_] );
}
;
relative_comparison_operator: '<'
| '<' '='
{
return join( '', @_[1..$#_] );
}
| '>'
| '>' '='
{
return join( '', @_[1..$#_] );
}
;
%%
# Footer section
sub _Error
{
my( $self ) = @_;
close $self->{USER}{FILEIN} if $self->{USER}{FILEIN};
my $msg = "$0: syntax error at line $self->{USER}{LINENO}, " .
"position $self->{USER}{CHARNO}";
if( $self->YYData->{INPUT} ) {
$self->YYData->{INPUT} =~ s/\n$//;
die "$msg: '" . $self->YYData->{INPUT} . "'.\n";
} else {
die "$msg.\n";
}
}
sub _Lexer
{
my( $self ) = @_;
# If the line is empty and the input is originating from the file,
# another line is read.
if( !$self->YYData->{INPUT} && $self->{USER}{FILEIN} ) {
my $filein = $self->{USER}{FILEIN};
$self->YYData->{INPUT} = <$filein>;
$self->{USER}{LINENO} = -1 unless exists $self->{USER}{LINENO};
$self->{USER}{LINENO}++;
$self->{USER}{CHARNO} = 0;
}
$self->YYData->{INPUT} =~ s/^(\s+)//;
$self->{USER}{CHARNO} += length( $1 ) if defined $1;
# Escaped double quote or backslash are detected here and returned
# as is to the caller in order to be detected as syntax errors.
if( $self->YYData->{INPUT} =~ s/^(\\"|\\\\)// ) {
$self->{USER}{CHARNO} += length( $1 );
return( $1, $1 );
}
# Handling strings
if( $self->YYData->{INPUT} =~ s/^"// ) {
$self->{USER}{CHARNO} ++;
my $string = '';
while( 1 ) {
if( $self->YYData->{INPUT} =~
s/^([A-Za-z_0-9 \t!#\$\%&\'\(\)\*\+,\-\.\/\:;<=>\?@\[\]\^`\{\|\}\~\P{ASCII}]+)// ) {
$self->{USER}{CHARNO} += length( $1 );
$string .= $1;
} elsif( $self->YYData->{INPUT} =~ s/^\\([\\"])// ) {
$self->{USER}{CHARNO} ++;
$string .= $1;
next;
} elsif( $self->YYData->{INPUT} =~ s/^"// ) {
$self->{USER}{CHARNO} ++;
return( 'string', $string );
} else {
return( undef, undef );
}
}
}
# Handling identifiers
if( $self->YYData->{INPUT} =~ s/^([a-z_][a-z0-9_]*)// ) {
$self->{USER}{CHARNO} += length( $1 );
return( 'identifier', $1 );
}
# Handling textual operators and other literals
if( $self->YYData->{INPUT} =~ s/^(AND|NOT|OR|
IS|UNKNOWN|KNOWN|
CONTAINS|STARTS|ENDS|WITH|
LENGTH|HAS|ALL|ONLY|ANY|
TRUE|FALSE)//x ) {
$self->{USER}{CHARNO} += length( $1 );
return( $1, $1 );
}
# Handling LIKE operator if allowed
if( $allow_LIKE_operator && $self->YYData->{INPUT} =~ s/^(LIKE)// ) {
$self->{USER}{CHARNO} += length( $1 );
return( $1, $1 );
}
# Handling numbers
if( $self->YYData->{INPUT} =~ s/^([+-]?
(\d+\.?\d*|\.\d+)
([eE][+-]?\d+)?)//x ) {
$self->{USER}{CHARNO} += length( $1 );
return( 'number', $1 );
}
my $char = substr( $self->YYData->{INPUT}, 0, 1 );
if( $char ne '' ) {
$self->YYData->{INPUT} = substr( $self->YYData->{INPUT}, 1 );
}
$self->{USER}{CHARNO}++;
return( $char, $char );
}
sub Run
{
my( $self, $filename ) = @_;
open $self->{USER}{FILEIN}, $filename;
my $result = $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error );
close $self->{USER}{FILEIN};
return $result;
}
sub parse_string
{
my( $self, $string ) = @_;
$self->YYData->{INPUT} = $string;
$self->{USER}{LINENO} = 0;
$self->{USER}{CHARNO} = 0;
return $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error );
}
sub modify
{
my $node = shift;
my $code = shift;
if( blessed $node && $node->can( 'modify' ) ) {
return $node->modify( $code, @_ );
} elsif( ref $node eq 'ARRAY' ) {
return [ map { modify( $_, $code, @_ ) } @$node ];
} else {
return $code->( $node, @_ );
}
}
1;