# *
# *     Copyright (c) 2000-2006 Alberto Reggiori <areggiori@webweaving.org>
# *                        Dirk-Willem van Gulik <dirkx@webweaving.org>
# *
# * NOTICE
# *
# * This product is distributed under a BSD/ASF like license as described in the 'LICENSE'
# * file you should have received together with this source code. If you did not get a
# * a copy of such a license agreement you can pick up one at:
# *
# *     http://rdfstore.sourceforge.net/LICENSE
# *
# * Changes:
# *     version 0.1
# *		- first hacked version: pure perl RDQL/SquishQL top-down LL(1) parser with some extesnions:
# *                  * LIKE operator in AND clause
# *                  * free-text triple matching like (?x, ?y, %"whatever"%)
# *     version 0.2
# *    		- added SELECT DISTINCT 
# *    		- added SPARQL PREFIX support and default/built-in prefixes
# *    		- added # and // style comments
# *    		- added SPARQL QNAME like support
# *		- added ?prefix:var QName support to vars
# *		- added SPARQL CONSTRUCT support
# *		- added SPARQL $var support
# *		- added getQueryType() method
# *		- added SPARQL DESCRIBE support
# *		- fixed bug in Literal() when matching floating point numbers
# *		- updated constraints and removed AND keyword to be SPARQL compatible
# *		- added not standard RDQL/SPARQL DELETE support
# *		- added default SPARQL PREFIX op: <http://www.w3.org/2001/sw/DataAccess/operations> and PREFIX fn: <http://www.w3.org/2004/07/xpath-functions>
# *		- updated and simplified constraints productions to reflect latest SPARQL spec
# *		- constraints are now stacked into a RPN
# *		- added full SPARQL graph-patterns and grouping
# *		- added SPARQL FROM NAMED support
# *		- added SPARQL LIMIT support
# *		- added SPARQL OFFSET support
# *		- added SPARQL ORDER BY support
# *

package RDQL::Parser;
{

use vars qw ( $VERSION );
use strict;
use Carp;

$VERSION = '0.2';

sub parse ($$);
sub MatchAndEat ($$);
sub error ($$);
sub Select ($);
sub Construct ($);
sub Describe ($);
#sub Ask ($);
sub OrderBy ($);
sub Limit ($);
sub Offset ($);
sub Delete ($);
sub From ($);
sub FromNamed ($);
sub GraphPattern ($);
sub GraphAndPattern ($);
sub PatternElement ($);
sub GroupGraphPattern ($);
sub SourceGraphPattern ($);
sub OptionalGraphPattern ($);
sub Var ($);
sub URIOrQName ($);
sub Literal ($);
sub TriplePattern ($);
sub VarOrURIOrQName ($);
sub VarOrURIOrQNameOrLiteral ($);
sub Constraint ($);
sub Prefixes ($);
sub PrefixDecl ($);
sub ConditionalOrExpression ($);
sub ConditionalAndExpression ($);
sub StringEqualityExpression ($);
sub PatternLiteral ($);
sub EqualityExpression ($);
sub RelationalExpression ($);
sub AdditiveExpression ($);
sub MultiplicativeExpression ($);
sub UnaryExpression ($);
sub UnaryExpressionNotPlusMinus ($);
sub PrimaryExpression ($);
sub FunctionCall ($);
sub ArgList ($);

# some useful default prefixes
%RDQL::Parser::default_prefixes= (
	'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
	'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
	'http://purl.org/rss/1.0/' => 'rss',
	'http://www.daml.org/2001/03/daml+oil#' => 'daml',
	'http://purl.org/dc/elements/1.1/' => 'dc',
	'http://purl.org/dc/terms/' => 'dcq',
	'http://xmlns.com/foaf/0.1/' => 'foaf',
	'http://www.w3.org/2001/XMLSchema#' => 'xsd',
	'http://www.w3.org/2002/07/owl#' => 'owl',
	# these two are SPARQL special - perhaps should not mix up with other namespaces? avoid to aoverride them?
	'http://www.w3.org/2001/sw/DataAccess/operations' => 'op',
	'http://www.w3.org/2004/07/xpath-functions' => 'fn'
	);

sub new {
	my $self = {
		prefixes		=>	{},
		sources			=>	[],
		from_named		=>	[],
		resultVars		=>	[],
		constructPatterns	=>	[],
		describes		=>	[],
		graphPatterns		=>	[],
		order_by		=>	[]
		};

	map {
		$self->{'prefixes'}->{ $RDQL::Parser::default_prefixes{ $_ } } = $_ ;
	} keys %RDQL::Parser::default_prefixes;

	bless $self, shift;
	};

sub MatchAndEat ($$) {
	my($class,$lit)=@_;

	# eat single line comments
	while( $class->{'query_string'} =~ s/^\s*(#|\/\/).*// ) {};

	# eat multi-line comments
	if( $class->{'query_string'} =~ s/^\s*\/\*// ) {
		while( $class->{'query_string'} !~ s/^\s*\*\/// ) {
			$class->{'query_string'} =~ s/^\s*(.)\s*//;
			};
		};

	return $class->{'query_string'} =~ s/^\s*\Q$lit\E\s*//i;
};

sub error($$) {
	my($class,$msg)=@_;
	croak "error: $msg: ".$class->{'query_string'}."\n";
};

sub parse($$) {
	my($class,$query) = @_;

	$class->{'query_string'} = $query;

	$class->{'context'}=[];
	$class->{'graph_patterns_pointer'} = []; #to check undeflow???
	
	while( MatchAndEat $class,'prefix' ) {
		PrefixDecl $class;
		};

	if( MatchAndEat $class,'select' ) {
		$class->{'queryType'} = 'SELECT';
		Select $class;
	} elsif( MatchAndEat $class,'construct' ) {
		$class->{'queryType'} = 'CONSTRUCT';
		Construct $class;
	} elsif( MatchAndEat $class,'describe' ) {
		$class->{'queryType'} = 'DESCRIBE';
		Describe $class;
	} elsif( MatchAndEat $class,'ask' ) {
		$class->{'queryType'} = 'ASK';
		#Ask $class;
	} elsif( MatchAndEat $class,'delete' ) {
		$class->{'queryType'} = 'DELETE';
		Delete $class;
	} else {
		error $class,'Expecting SELECT, CONSTRUCT, DESCRIBE, ASK or DELETE token'
			if($class->{'query_string'} ne '');
		};

	while( MatchAndEat $class,'prefix' ) {
		PrefixDecl $class;
		};

	while(	MatchAndEat $class,'source' or
		MatchAndEat $class,'from' ) {
		if( MatchAndEat $class,'named' ) {
			FromNamed $class;
		} else {
			From $class;
			};
		};

	GraphPattern $class
		if( MatchAndEat $class,'where');

	while(	MatchAndEat $class,'order' and
		MatchAndEat $class,'by' ) {
		OrderBy $class;
		};

	Limit $class
		if(MatchAndEat $class,'limit');

	Offset $class
		if(MatchAndEat $class,'offset');

	# eat this up anyway to keep legacy RDQL queries working...
        Prefixes $class
		if(MatchAndEat $class,'using');

	$class->{'query_string'} =~ s/^\s*//;
	$class->{'query_string'} =~ s/\s*$//;

	error $class,'illegal input'
		if($class->{'query_string'} ne '');

	delete($class->{'query_string'});
	delete($class->{'context'});
	delete($class->{'graph_patterns_pointer'});

	#use Data::Dumper;
	#print STDERR Dumper($class);

	return $class;
	};

sub Select($) {
	my($class) = @_;

	$class->{'distinct'} = ( MatchAndEat $class,'distinct' ) ? 1 : 0;
	push @{ $class->{'context'} }, 'select';
	if( MatchAndEat $class,'*') {
		push @{$class->{resultVars}},'*';
	} elsif( Var $class ) {
		do {
			MatchAndEat $class,',';
			} while ( Var $class );
	};
	pop @{ $class->{'context'} };
};

sub OrderBy($) {
	my($class) = @_;

	push @{ $class->{'context'} }, 'order by';

	if( MatchAndEat $class,'asc' ) {
		ConditionalOrExpression $class;

		push @{ $class->{'order_by'} }, 'ASC';
	} elsif( MatchAndEat $class,'desc' ) {
		ConditionalOrExpression $class;

		push @{ $class->{'order_by'} }, 'DESC';
	} else {
		if ( Var $class ) {
		} elsif ( FunctionCall $class ) {
		} else {
			ConditionalOrExpression $class;
			};

		push @{ $class->{'order_by'} }, 'ASC';
		};

	pop @{ $class->{'context'} };
};

sub Limit($) {
	my($class) = @_;

	push @{ $class->{'context'} }, 'limit';

	error $class,"limit requires an integer value"
		unless( Literal $class );

	error $class,"limit is invalid"
		unless(	$class->{'limit'} >= 0 );

	pop @{ $class->{'context'} };
};

sub Offset($) {
	my($class) = @_;

	push @{ $class->{'context'} }, 'offset';

	error $class,"offset requires an integer value"
		unless( Literal $class );

	error $class,"offset is invalid"
		unless(	$class->{'offset'} >= 0 );

	pop @{ $class->{'context'} };
};

sub Construct($) {
	my($class) = @_;

	$class->{'distinct'} = 0; #useless? see DBD::RDFStore driver
	push @{ $class->{'context'} }, 'construct';
	if( MatchAndEat $class,'*') {
		push @{$class->{constructPatterns}},'*';
	} elsif( TriplePattern $class ) {
	} else {
		if( MatchAndEat $class,'{' ) {
			# we do not deal with nested Groups yet...
			while ( TriplePattern $class ) {
                		MatchAndEat $class,',';
                		};

			error $class,"missing right brace"
				unless( MatchAndEat $class,'}' );
		} else {
			error $class,"missing left brace";
			};
		};
	pop @{ $class->{'context'} };
	};

sub Describe($) {
	my($class) = @_;

	$class->{'distinct'} = 0; #useless? see DBD::RDFStore driver
	push @{ $class->{'context'} }, 'describe';
	if( MatchAndEat $class,'*') {
		push @{$class->{describes}},'*';
	} elsif( VarOrURIOrQName $class ) {
		do {
			MatchAndEat $class,',';
			} while ( VarOrURIOrQName $class );
		};
	pop @{ $class->{'context'} };
	};

sub Delete($) {
	my($class) = @_;

	$class->{'distinct'} = 0;
	push @{$class->{resultVars}},'*'
		if( MatchAndEat $class,'*');
	};

sub Var($) {
	my($class) = @_;

	if($class->{'query_string'} =~ s/^\s*[\?\$]([a-zA-Z0-9_\.:]+)\s*//) {
		my $var = '?'.$1; # we force ?var style anyway
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'select' ) {
			push @{$class->{resultVars}}, $var
				unless(grep /^\Q$var\E$/,@{$class->{resultVars}});
			};
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'describe' ) {
			push @{$class->{describes}},$var
				unless(grep /^\Q$var\E$/,@{$class->{describes}});
			};
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'triples' ) {
			push @{$class->{triple_pattern}}, $var;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'constraints' ) {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, $var;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
			push @{ $class->{'order_by'} }, $var;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'named_graph' ) {
			$class->{'graph_name'} = $var;
		};

		return 1;
	};
	return 0;
};

sub FromNamed($) {
	my($class) = @_;

	push @{ $class->{'context'} }, 'from_named';
	if( URIOrQName $class ) {
		do {
                        MatchAndEat $class,',';
                        } while ( URIOrQName $class );
	} else {
		error $class, "malformed URI or QName";
	};
	pop @{ $class->{'context'} };
	};

sub From($) {
	my($class) = @_;

	push @{ $class->{'context'} }, 'source';
	if( URIOrQName $class ) {
		do {
                        MatchAndEat $class,',';
                        } while ( URIOrQName $class );
	} else {
		error $class, "malformed URI or QName";
	};
	pop @{ $class->{'context'} };
	};

sub URIOrQName($) {
	my($class) = @_;

	# the following covers also RDFStore/RDQL extensions for simple OR <URI1 , URI2 , URI3 ....>, 
	# <pp:ff , pp1:ff> and <"string a" , "literal b" .... "literal n">
	#if($class->{'query_string'} =~ s/^\s*((\<[^>]*\>)|([a-zA-Z0-9\-_$\.]+:[a-zA-Z0-9\-_$\.]+)|([a-zA-Z0-9\-_$\.]+:))\s*//) {
	if($class->{'query_string'} =~ s/^\s*(\<[^>]*\>)\s*//) { #not yet the above - but we are NOT RDQL compliant then - no QNames (need to fix all the DBD driver too then)
		# in the old RDQL syntax we do not deal with prefixes here yet but directly in the DBD::RDFStore driver code instead
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'triples' ) {
			push @{$class->{triple_pattern}}, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'from_named' ) {
			push @{$class->{from_named}}, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'source' ) {
			push @{$class->{sources}}, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'constraints' ) {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
			push @{ $class->{'order_by'} }, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'named_graph' ) {
			$class->{'graph_name'} = $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'describe' ) {
			push @{$class->{describes}},$1
				unless(grep /^\Q$1\E$/,@{$class->{describes}});
			};
		return 1;
	} elsif($class->{'query_string'} =~ s/^\s*([a-zA-Z0-9\-_$\.]+)?:([a-zA-Z0-9\-_$\.]+)\s*//) {
		# I am lazy, and do not want to fix DBD::RDFStore driver code too...
		my $qn;
		if($1) {
			# look up for a prefix if there
			if( exists $class->{'prefixes'}->{$1} ) {
				$qn = '<'. $class->{'prefixes'}->{$1} .$2.'>';
			} else {
				# otherwise should say unbound prefix in new SPARQL with pre-PREFIX syntax
				error $class,"Unbound prefix $1 ";
				};
		} else {
			# try to use default one
			$qn = '<'.( ( exists $class->{'prefixes'}->{'#default'} ) ? $class->{'prefixes'}->{'#default'} : $1 ).$2.'>';
			};
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'triples' ) {
			push @{$class->{triple_pattern}}, $qn;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'from_named' ) {
			push @{$class->{from_named}}, $qn;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'source' ) {
			push @{$class->{sources}}, $qn;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'constraints' ) {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, $qn;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
			push @{ $class->{'order_by'} }, $qn;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'named_graph' ) {
			$class->{'graph_name'} = $qn;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'describe' ) {
			push @{$class->{describes}},$qn
				unless(grep /^\Q$qn\E$/,@{$class->{describes}});
			};
		return 1;
		};
	return 0;
};

sub Literal($) {
	my($class) = @_;

	if(	($class->{'query_string'} =~ s/^\s*(([0-9]+\.[0-9]*([eE][+-]?[0-9]+)?[fFdD]?)|(\.[0-9]+([eE][+-]?[0-9]+)?[fFdD]?)|([0-9]+[eE][+-]?[0-9]+[fFdD]?)|([0-9]+([eE][+-]?[0-9]+)?[fFdD]))\s*//) or
		#($class->{'query_string'} =~ s/^\s*(%?\'((([^\'\\\n\r])|(\\([ntbrf\\'\"])|([0-7][0-7?)|([0-3][0-7][0-7]))))\'%?)\s*//) or
		($class->{'query_string'} =~ s/^\s*(%?[\"\']((([^\"\'\\\n\r])|(\\([ntbrf\\'\"])|([0-7][0-7?)|([0-3][0-7][0-7])))*)[\"\'](\@([a-z0-9]+(-[a-z0-9]+)?))?%?)\s*//) or
		($class->{'query_string'} =~ s/^\s*([0-9]+)\s*//) or
		($class->{'query_string'} =~ s/^\s*(0[xX]([0-9",a-f,A-F])+)\s*//) or
		#($class->{'query_string'} =~ s/^\s*(0[0-7]*)\s*//) or
		($class->{'query_string'} =~ s/^\s*(true|false)\s*//) or
		($class->{'query_string'} =~ s/^\s*(null)\s*//) ) {
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'triples' ) {
			push @{$class->{triple_pattern}}, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'constraints' ) {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
			push @{ $class->{'order_by'} }, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'named_graph' ) {
			$class->{'graph_name'} = $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'limit' ) {
			$class->{'limit'} = $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'offset' ) {
			$class->{'offset'} = $1;
		};
		return 1;
	};
	return 0;
};

sub GraphPattern($) {
	my($class) = @_;

	GraphAndPattern $class;
	while( MatchAndEat $class,'UNION' ) { # we might have an issue with UNION and AND 'expression' constraints if no braces - to be checked
		GraphAndPattern $class;

		push @{$class->{'graphPatterns'}}, 'UNION';
		};
	};

sub GraphAndPattern($) {
	my($class) = @_;

	# shall we check if previous on stack is an empty block, and use that one instead? Or how can we evaluate empty blocks?
	push @{$class->{'graphPatterns'}}, {
		'triplePatterns' =>	[],
		'constraints'	 =>	[],
		'optional'	 => 	( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'optional' ) ? 1 : 0
		};
	push @{ $class->{'graph_patterns_pointer'} }, $#{$class->{'graphPatterns'}};

        while( PatternElement $class ) {
		MatchAndEat $class,',';
		};

	pop @{ $class->{'graph_patterns_pointer'} };
	};

sub PatternElement($) {
	my($class) = @_;

	if( TriplePattern $class ) {
	} elsif( GroupGraphPattern $class ) {
	} elsif( SourceGraphPattern $class ) {
	} elsif( OptionalGraphPattern $class ) {
	} elsif( MatchAndEat $class,'and' ) {
		Constraint $class;

		$class->{'graphPatterns'}->[$#{$class->{'graphPatterns'}}]->{'constraints_optional'} = ( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'optional' ) ? 1 : 0;
	} else {
		return 0;
		};
	return 1;
	};

sub GroupGraphPattern($) {
	my($class) = @_;

	if( MatchAndEat $class,'{' ) {
		GraphPattern $class;

		error $class,"missing right brace"
			unless( MatchAndEat $class,'}' );

		push @{$class->{'graphPatterns'}}, 'AND';

		return 1;
	} else {
		return 0;
		};
	};

# SOURCE is just a modifier how to process a triple-pattern
sub SourceGraphPattern($) {
	my($class) = @_;

	if( MatchAndEat $class,'graph' ) {
		push @{ $class->{'context'} }, 'named_graph';

		# need to add GRAPH * - what does it really mean in the triple-pattern?
		error $class,"malformed GRAPH clause"
			unless( VarOrURIOrQName $class ); #context

		PatternElement $class;

		delete($class->{'graph_name'});

		pop @{ $class->{'context'} };

		return 1;
	} else {
		return 0;
		};
	};

# optionals are just a modifier how to process triple-patterns or blocks (triple-patterns+constraints)
# NOTE: for triple-patterns we allocate the 1st element of the array to flag (0/1) whether or not it is OPTIONAL
sub OptionalGraphPattern($) {
	my($class) = @_;

	if( MatchAndEat $class,'optional' ) {
		push @{ $class->{'context'} }, 'optional';

		PatternElement $class;

		pop @{ $class->{'context'} };

		return 1;
	} else {
		return 0;
		};
	};

sub TriplePattern($) {
	my($class) = @_;

	if( MatchAndEat $class,'(' ) {
		$class->{triple_pattern}=[ ( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'optional' ) ? 1 : 0 ];

		push @{ $class->{'context'} }, 'triples';

		error $class,"malformed subject variable, URI or QName"
			unless VarOrURIOrQName $class; #subject

		MatchAndEat $class,',';
		error $class,"malformed predicate variable, URI or QName"
			unless VarOrURIOrQName $class; #predicate

		MatchAndEat $class,',';
		error $class,"malformed object variable, URI, QName or literal"
			unless VarOrURIOrQNameOrLiteral $class; #object

		MatchAndEat $class,',';
		unless( VarOrURIOrQNameOrLiteral $class ) { #context
			push @{$class->{triple_pattern}}, $class->{'graph_name'}
				if( exists $class->{'graph_name'} );
			};

		error $class,"missing right round bracket"
			unless( MatchAndEat $class,')' );

		if(	( ( $#{ $class->{'context'} } - 1 ) >= 0 ) and
			$class->{'context'}->[ $#{ $class->{'context'} } - 1 ] eq 'construct' and
			$class->{constructPatterns}->[0] ne '*' ) {
			push @{$class->{constructPatterns}}, $class->{triple_pattern};
		} else {
			push @{$class->{'graphPatterns'}->[ $class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}] ]->{triplePatterns}}, $class->{triple_pattern};
			};

		delete($class->{triple_pattern});

		pop @{ $class->{'context'} };

		return 1;
	} else {
		return 0;
		};
	};

sub VarOrURIOrQName($) {
	my($class) = @_;

	return ( Var $class or URIOrQName $class );
};

sub VarOrURIOrQNameOrLiteral($) {
	my($class) = @_;

	return ( Var $class or URIOrQName $class or Literal $class );
};

sub Prefixes($) {
	my($class) = @_;

	while( PrefixDecl $class ) {
		MatchAndEat $class,',';
		};
};

sub PrefixDecl($) {
	my($class) = @_;
	if($class->{'query_string'} =~ s/^\s*(\w[\w\d]*)?:\s+\<([A-Za-z][^>]*)\>\s*//i) {
		return 0
			if( $1 eq 'fn' or $1 eq 'op'); #ignore overrride of special ones??
		$class->{prefixes}->{ ($1) ? $1 : '#default' }=$2;
		return 1;
	} elsif($class->{'query_string'} =~ s/^\s*(\w[\w\d]*)\s+FOR\s+\<([A-Za-z][^>]*)\>\s*//i) {
		return 0
			if(	( $1 eq 'fn' and $2 ne $class->{prefixes}->{'fn'} ) or
				( $1 eq 'op' and $2 ne $class->{prefixes}->{'op'} ) ); #ignore overrride of special ones??
		$class->{prefixes}->{$1}=$2;
		return 1;
		};
	return 0;
	};

sub Constraint($) {
	my($class) = @_;

	push @{ $class->{'context'} }, 'constraints';

	ConditionalOrExpression $class;
	while(  MatchAndEat $class,',' or
                MatchAndEat $class,'and') {
                ConditionalOrExpression $class;
        	};

	pop @{ $class->{'context'} };
	};

# we skip ConditionalXorExpression...
sub ConditionalOrExpression($) {
	my($class) = @_;

	ConditionalAndExpression $class;
	while( MatchAndEat $class,'||' ) {
		ConditionalAndExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
			push @{ $class->{'order_by'} }, '||';
		} else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '||';
			};
	};
};

sub ConditionalAndExpression($) {
	my($class) = @_;

	StringEqualityExpression $class;
	while( MatchAndEat $class,'&&' ) {
		StringEqualityExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                        push @{ $class->{'order_by'} }, '&&';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '&&';
                        };
	};
};

sub StringEqualityExpression($) {
	my($class) = @_;

	EqualityExpression $class;
	my $true=1;
	while( $true ) {
		if( MatchAndEat $class,'eq' ) {
			EqualityExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                        	push @{ $class->{'order_by'} }, 'eq';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, 'eq';
                        	};
		} elsif( MatchAndEat $class,'ne' ) {
			EqualityExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                        	push @{ $class->{'order_by'} }, 'ne';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, 'ne';
                        	};
		} elsif(	( MatchAndEat $class,'=~' ) ||
				( MatchAndEat $class,'LIKE' ) ) { # pattern is like [m]/pattern/[i][m][s][x]
			PatternLiteral $class; # should some pattern literal
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                        	push @{ $class->{'order_by'} }, '=~';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '=~';
                        	};
		} elsif( MatchAndEat $class,'!~' ) {
			PatternLiteral $class; # should some pattern literal
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                        	push @{ $class->{'order_by'} }, '!~';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '!~';
                        	};
		} else {
			$true=0;
		};
	};
};

sub PatternLiteral($) {
	my($class) = @_;

	if( $class->{'query_string'} =~ s/([m]?\/(.*)\/[i]?[m]?[s]?[x]?)// ) {
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'constraints' ) {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, $1;
		} elsif( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                       	push @{ $class->{'order_by'} }, $1;
                        };
		};
	};

sub EqualityExpression($) {
	my($class) = @_;

	RelationalExpression $class;
	my $true=1;
	while( $true ) {
		if( MatchAndEat $class,'==' ) {
			RelationalExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                       		push @{ $class->{'order_by'} }, '==';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '==';
                        	};
		} elsif( MatchAndEat $class,'!=' ) {
			RelationalExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                       		push @{ $class->{'order_by'} }, '!=';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '!=';
                        	};
		} else {
			$true=0;
		};
	};
};

sub RelationalExpression($) {
	my($class) = @_;

	AdditiveExpression $class;
	if( MatchAndEat $class,'>=' or MatchAndEat $class,'>=' ) {
		AdditiveExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                	push @{ $class->{'order_by'} }, '>=';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '>=';
                       	};
	} elsif( MatchAndEat $class,'<=' or MatchAndEat $class,'<=' ) {
		AdditiveExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                	push @{ $class->{'order_by'} }, '<=';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '<=';
                       	};
	} elsif( MatchAndEat $class,'<' ) {
		AdditiveExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                	push @{ $class->{'order_by'} }, '<';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '<';
                       	};
	} elsif( MatchAndEat $class,'>' ) {
		AdditiveExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                	push @{ $class->{'order_by'} }, '>';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '>';
                       	};
	};
};

sub AdditiveExpression($) {
	my($class) = @_;

	MultiplicativeExpression $class;
	my $true=1;
	while( $true ) {
		if( MatchAndEat $class,'+' ) {
			MultiplicativeExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                		push @{ $class->{'order_by'} }, '+';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '+';
                       		};
		} elsif( MatchAndEat $class,'-' ) {
			MultiplicativeExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                		push @{ $class->{'order_by'} }, '-';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '-';
                       		};
		} else {
			$true=0;
		};
	};
};

sub MultiplicativeExpression($) {
	my($class) = @_;

	UnaryExpression $class;
	my $true=1;
	while( $true ) {
		if( MatchAndEat $class,'*' ) {
			UnaryExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                		push @{ $class->{'order_by'} }, '*';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '*';
                       		};
		} elsif( MatchAndEat $class,'/' ) {
			UnaryExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                		push @{ $class->{'order_by'} }, '/';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '/';
                       		};
		} elsif( MatchAndEat $class,'%' ) {
			UnaryExpression $class;
			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                		push @{ $class->{'order_by'} }, '%';
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '%';
                       		};
		} else {
			$true=0;
		};
	};
};

sub UnaryExpression($) {
	my($class) = @_;

	UnaryExpressionNotPlusMinus $class;
};

sub UnaryExpressionNotPlusMinus($) {
	my($class) = @_;

	if( MatchAndEat $class,'~' ) {
		UnaryExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                	push @{ $class->{'order_by'} }, '~';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '~';
                       	};
	} elsif ( MatchAndEat $class,'!' ) {
		UnaryExpression $class;
		if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                	push @{ $class->{'order_by'} }, '!';
                } else {
			push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, '!';
                       	};
	} else {
		PrimaryExpression $class;
	};
};

sub PrimaryExpression($) {
	my($class) = @_;

	if( MatchAndEat $class,'(' ) {

		ConditionalOrExpression $class;

		error $class,"missing right round bracket"
			unless( MatchAndEat $class,')' );

	} else {
		unless(	Var $class or URIOrQName $class or Literal $class ) {
			FunctionCall $class;
		};
	};
};

sub FunctionCall($) {
	my($class) = @_;

	if(	( MatchAndEat $class,'&' ) &&
		($class->{'query_string'} =~ s/^\s*([a-zA-Z0-9\-_$\.]+)?:([a-zA-Z0-9\-_$\.]+)\s*//) ) {
		#	if( $1 ne 'fn' and $1 ne 'op' );

		# look up for a prefix if there
		# NOTE: otherwise should say unbound prefix in new SPARQL with pre-PREFIX syntax
		my $qn;
		if( exists $class->{'prefixes'}->{ ($1) ? $1 : '#default' } ) {
			$qn = $class->{'prefixes'}->{ ($1) ? $1 : '#default' } . $2 ;
		} else {
			error $class,"Unsupported function call $1:$2";
			};

		if( MatchAndEat $class,'(' ) {

			ArgList $class;

			error $class,"missing right round bracket"
				unless( MatchAndEat $class,')' );

			if( $class->{'context'}->[ $#{ $class->{'context'} } ] eq 'order by' ) {
                		push @{ $class->{'order_by'} }, ( '&', $qn );
                	} else {
				push @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }, ( '&', $qn );
                       		};
			};

		return 1;
	} else {
		return 0;
                };
	};

sub ArgList($) {
	my($class) = @_;

	if( Var $class or URIOrQName $class or Literal $class ) {
		my $true=1;
		while( $true ) {
			if( MatchAndEat $class,',' ) {

				unless( Var $class or URIOrQName $class or Literal $class ) {
					$true=0;

				};
			} else {
				$true=0;
			};
		};
	};
};

# see SPARQL spec http://www.w3.org/TR/rdf-sparql-query/ - generally it can be SELECT, CONSTRUCT, DESCRIBE or ASK
sub getQueryType {
	my($class) = @_;

	return $class->{'queryType'};
	};

sub serialize {
	my($class, $fh, $syntax) = @_;

	if(	(! $syntax ) ||
                ( $syntax =~ m/N-Triples/i) ) {
		# not yet supported ?
                return
			if($#{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{constraints} }>=0);
		foreach my $tp ( @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{triplePatterns} } ) {
			return
				if( ($#{$tp}==3) || #Quads not there yet
			            (     ($tp->[2] =~ m/^%/) && #my free-text extensions
                                          ($tp->[2] =~ m/%$/) ) );
			};

		# convert
		my @nt;
		foreach my $tp ( @{ $class->{'graphPatterns'}->[$class->{'graph_patterns_pointer'}->[$#{$class->{'graph_patterns_pointer'}}]]->{triplePatterns} } ) {
			my @tp;
			map {
				my $ff = $class->{'query_string'};
				$ff =~ s/^[\?\$](.+)$/_:$1/;
				$ff =~ s/[\$:]/-/g;
				if(	($ff =~ m/^<(([^\:]+)\:{1,2}([^>]+))>$/) &&
					(defined $2) &&
					(exists $class->{prefixes}->{$2}) ) {
					push @tp, '<'.$class->{prefixes}->{$2}.$3.'>';
				} else {
					push @tp, $ff;
					};
			} @{$tp};
			push @tp, '.';
			push @nt, join(' ',@tp);
			};
		if($fh) {
			print $fh join("\n",@nt);
			return 1;
		} else {
			return join("\n",@nt);
			};
        } else {
                croak "Unknown serialization syntax '$syntax'";
                };
	};

sub DESTROY {
	my($class) = @_;

};

1;
};

__END__

=head1 NAME

RDQL::Parser - A simple top-down LL(1) RDQL and SPARQL parser

=head1 SYNOPSIS

	use RDQL::Parser;
	my $parser = RDQL::Parser->new();
	my $query = <<QUERY;

	PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
	PREFIX rss: <http://purl.org/rss/1.0/>
        SELECT
           ?title ?link
        FROM
           <http://xmlhack.com/rss10.php>
        WHERE
	   (?item, rdf:type <rss:item>)
	   (?item, rss:title, ?title)
	   (?item, rss:link ?link)

        QUERY;

        $parser->parse($query); #parse the query

	# I.e.
	$parser = bless( {
                 'distinct' => 0,
                 'constructPatterns' => [],
                 'prefixes' => {
                                 'fn' => 'http://www.w3.org/2004/07/xpath-functions',
                                 'op' => 'http://www.w3.org/2001/sw/DataAccess/operations',
                                 'owl' => 'http://www.w3.org/2002/07/owl#',
                                 'dcq' => 'http://purl.org/dc/terms/',
                                 'dc' => 'http://purl.org/dc/elements/1.1/',
                                 'foaf' => 'http://xmlns.com/foaf/0.1/',
                                 'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#',
                                 'rss' => 'http://purl.org/rss/1.0/',
                                 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
                                 'xsd' => 'http://www.w3.org/2001/XMLSchema#',
                                 'daml' => 'http://www.daml.org/2001/03/daml+oil#'
                               },
                 'graphPatterns' => [
                                      {
                                        'constraints' => [],
                                        'optional' => 0,
                                        'triplePatterns' => [
                                                              [
                                                                0,
                                                                '?item',
                                                                '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>',
                                                                '<rss:item>'
                                                              ],
                                                              [
                                                                0,
                                                                '?item',
                                                                '<http://purl.org/rss/1.0/title>',
                                                                '?title'
                                                              ],
                                                              [
                                                                0,
                                                                '?item',
                                                                '<http://purl.org/rss/1.0/link>',
                                                                '?link'
                                                              ]
                                                            ]
                                      }
                                    ],
                 'sources' => [
                                '<http://xmlhack.com/rss10.php>'
                              ],
                 'describes' => [],
                 'queryType' => 'SELECT',
                 'resultVars' => [
                                   '?title',
                                   '?link'
                                 ],
               }, 'RDQL::Parser' );

	$parser->serialize(*STDOUT, 'N-Triples'); #print on STDOUT the RDQL query as N-Triples if possible (or an error)

=head1 DESCRIPTION

 RDQL::Parser - A simple top-down LL(1) RDQL and SPARQL parser - see http://www.w3.org/TR/rdf-sparql-query/ and http://www.w3.org/Submission/2004/SUBM-RDQL-20040109/

=head1 CONSTRUCTORS

=item $parser = new RDQL::Parser;

=head1 METHODS

=item parse( PARSER, QUERY )

 If use Data::Dumper(3) to actually dumpo out the content of the PARSER variable after invoching the parse() method it lokks like:

 $VAR1 = bless( {
                 'distinct' => 0,
                 'constructPatterns' => [],
                 'prefixes' => {
                                 'fn' => 'http://www.w3.org/2004/07/xpath-functions',
                                 'op' => 'http://www.w3.org/2001/sw/DataAccess/operations',
                                 'owl' => 'http://www.w3.org/2002/07/owl#',
                                 'dcq' => 'http://purl.org/dc/terms/',
                                 'dc' => 'http://purl.org/dc/elements/1.1/',
                                 'foaf' => 'http://xmlns.com/foaf/0.1/',
                                 'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#',
                                 'rss' => 'http://purl.org/rss/1.0/',
                                 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
                                 'xsd' => 'http://www.w3.org/2001/XMLSchema#',
                                 'daml' => 'http://www.daml.org/2001/03/daml+oil#'
                               },
                 'graphPatterns' => [
                                      {
                                        'constraints' => [],
                                        'optional' => 0,
                                        'triplePatterns' => [
                                                              [
                                                                0,
                                                                '?item',
                                                                '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>',
                                                                '<rss:item>'
                                                              ],
                                                              [
                                                                0,
                                                                '?item',
                                                                '<http://purl.org/rss/1.0/title>',
                                                                '?title'
                                                              ],
                                                              [
                                                                0,
                                                                '?item',
                                                                '<http://purl.org/rss/1.0/link>',
                                                                '?link'
                                                              ]
                                                            ]
                                      }
                                    ],
                 'sources' => [
                                '<http://xmlhack.com/rss10.php>'
                              ],
                 'describes' => [],
                 'queryType' => 'SELECT',
                 'resultVars' => [
                                   '?title',
                                   '?link'
                                 ],
               }, 'RDQL::Parser' );

=head1 NOTES

	The RDQL implementation is actually an extension of the original RDQL spec (http://www.w3.org/Submission/2004/SUBM-RDQL-20040109/)
	to allow more SQL-like Data Manipulation Language (DML) features like DELETE and INSERT - which is much more close to the original rdfdb
	query language which SquishQL/RDQL are inspired to (see http://www.guha.com/rdfdb).
	
	As well as the SPARQL one....?

=head1 SEE ALSO

DBD::RDFStore(3)

http://www.w3.org/TR/rdf-sparql-query/
http://www.w3.org/Submission/2004/SUBM-RDQL-20040109/
http://ilrt.org/discovery/2002/04/query/
http://www.hpl.hp.com/semweb/doc/tutorial/RDQL/
http://rdfstore.sourceforge.net/documentation/papers/HPL-2002-110.pdf

=head1 FAQ

=item I<What's the difference between RDQL and SquishQL?>

=item None :-) The former is a bit of an extension of the original SquishQL proposal defining a proper BNF to the query language; the only practical difference is that triple patterns in the WHERE clause are expressed in a different order s,p,o for RDQL while SquishQL uses '(p s o)' without commas. In addition the URIs are expressed with angle brackets on RDQL while SquishQL do not. For more about differences between the two languages see http://rdfstore.sourceforge.net/documentation/papers/HPL-2002-110.pdf

=item I<Is RDQL::Parser compliant to RDQL BNF?>

=item Yes

=item I<Is RDQL::Parser compliant to SquishQL syntax ?>

=item Not yet :)

=item I<What are RDQL::Parser extensions to RDQL BNF?>

=item RDQL::Parser leverage on RDFStore(3) to run proper free-text UTF-8 queries over literals; the two main extensions are

=item * LIKE operator in AND clause

=item * free-text triple matching like (?x, ?y, %"whatever"%)

=head1 AUTHOR

	Alberto Reggiori <areggiori@webweaving.org>
	Andy Seaborne <andy_seaborne@hp.com> is the original author of RDQL
	Libby Miller <libby.miller@bristol.ac.uk> is the original author of SquishQL