package LucyX::Search::WildcardQuery;
use strict;
use warnings;
use base qw( Lucy::Search::Query );
use Carp;
use Scalar::Util qw( blessed );
use LucyX::Search::WildcardCompiler;

our $VERSION = '0.06';

=head1 NAME

LucyX::Search::WildcardQuery - Lucy query extension


 my $query = LucyX::Search::WildcardQuery->new(
    term    => 'green*',
    field   => 'color',
 my $hits = $searcher->hits( query => $query );


LucyX::Search::WildcardQuery extends the 
Lucy::QueryParser syntax
to support wildcards. This code is similar to the sample PrefixQuery
code in the Lucy distribution and the KinoSearch::Search::WildCardQuery
and Search::Query::Dialect::KSx::WildcardQuery module on CPAN.

=head1 METHODS

This class is a subclass of Lucy::Search::Query. Only new or overridden
methods are documented here.


# Inside-out member vars
my %term;
my %field;
my %regex;
my %prefix;
my %suffix;
my %lex_terms;

=head2 new( I<args> )

Create a new WildcardQuery object. I<args> must contain key/value pairs
for C<field> and C<term>.


sub new {
    my ( $class, %args ) = @_;
    my $term  = delete $args{term};
    my $field = delete $args{field};
    my $self  = $class->SUPER::new(%args);
    confess("'term' param is required")
        unless defined $term;
    confess("Invalid term: '$term'")
        unless $term =~ /[\*\?]/;
    confess("'field' param is required")
        unless defined $field;
    $term{$$self}  = $term;
    $field{$$self} = $field;
    return $self;

sub _build_regex {
    my ( $self, $term ) = @_;
    $term = quotemeta($term);  # turn into a regexp that matches a literal str
    $term =~ s/\\\*/.*/g;          # convert wildcards into regex
    $term =~ s/\\\?/.?/g;          # convert wildcards into regex
    $term =~ s/(?:\.\*){2,}/.*/g;  # eliminate multiple consecutive wild cards
    $term =~ s/(?:\.\?){2,}/.?/g;  # eliminate multiple consecutive wild cards
    $term =~ s/^/^/;    # unless $term =~ s/^\.\*//;    # anchor the regexp to
        =~ s/\z/\\z/;  # unless $term =~ s/\.\*\z//;    # the ends of the term
    $regex{$$self} = qr/$term/;

    # get the literal prefix of the regexp, if any.
    if ($regex{$$self} =~ m<^
            (?:    # prefix for qr//'s, without allowing /i :
                \(\? ([a-hj-z]*) (?:-[a-z]*)?:
            (\\[GA]|\^) # anchor
            ([^#\$()*+.?[\]\\^]+) # literal pat (no metachars or comments)
            my ( $mod, $anchor, $prefix ) = ( $1 || '', $2, $3 );
            $anchor eq '^' and $mod =~ /m/ and last;
            for ($prefix) {
                $mod =~ /x/ and s/\s+//g;
            $prefix{$$self} = $prefix;

    if ( $term =~ m/\.[\?\*](\w+)/ ) {
        my $suffix = $1;
        $suffix{$$self} = $suffix;


=head2 get_term

=head2 get_field

Retrieve the value set in new().

=head2 get_regex

Retrieve the qr// object representing I<term>.

=head2 get_prefix

Retrieve the literal string (if any) that precedes the wildcards
in I<term>.

=head2 get_suffix

Retrieve the literal string (if any) that follows the wildcards
in I<term>.


sub get_term   { my $self = shift; return $term{$$self} }
sub get_field  { my $self = shift; return $field{$$self} }
sub get_regex  { my $self = shift; return $regex{$$self} }
sub get_prefix { my $self = shift; return $prefix{$$self} }
sub get_suffix { my $self = shift; return $suffix{$$self} }

=head2 add_lex_term( I<term> )

Push I<term> onto the stack of lexicon terms that this Query matches.


sub add_lex_term {
    my $self = shift;
    my $t    = shift;
    croak "term required" unless defined $t;

=head2 get_lex_terms

Returns array ref of terms in the lexicons that this
query matches.


sub get_lex_terms {
    my $self = shift;
    return [ keys %{ $lex_terms{$$self} } ];

    my $self = shift;
    delete $term{$$self};
    delete $field{$$self};
    delete $prefix{$$self};
    delete $suffix{$$self};
    delete $regex{$$self};
    delete $lex_terms{$$self};

=head2 equals

Returns true (1) if the object represents the same kind of query
clause as another WildcardQuery.

NOTE: Currently a NOTWildcardQuery and a WildcardQuery object will
evaluate as equal if they have the same terma and field. This is a bug.


sub equals {
    my ( $self, $other ) = @_;
    return 0 unless blessed($other);
    return 0
        unless $other->isa( blessed($self) );
    return 0 unless $self->get_field eq $other->get_field;
    return 0 unless $self->get_term  eq $other->get_term;
    return 1;

=head2 to_string

Returns the query clause the object represents.


sub to_string {
    my $self = shift;
    return "$field{$$self}:$term{$$self}";

=head2 make_compiler

Returns a LucyX::Search::WildcardCompiler object.


sub make_compiler {
    my $self        = shift;
    my %args        = @_;
    my $subordinate = delete $args{subordinate};    # new in Lucy 0.2.2
    $args{parent} = $self;
    my $compiler = LucyX::Search::WildcardCompiler->new(%args);

    # unlike Search::Query synopsis, normalize()
    # is called internally in $compiler.
    # This should be fixed in a C re-write.
    #$compiler->normalize unless $subordinate;

    return $compiler;



=head1 AUTHOR

Peter Karman, C<< <karman at> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-lucyx-search-wildcardquery at>, or through
the web interface at L<>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc LucyX::Search::WildcardQuery

You can also look for information at:

=over 4

=item * RT: CPAN's request tracker


=item * AnnoCPAN: Annotated CPAN documentation


=item * CPAN Ratings


=item * Search CPAN




Copyright 2011 Peter Karman.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See for more information.