package Catalyst::Model::SWISH;
use strict;
use warnings;
use base qw( Catalyst::Model );
use Carp;
use SWISH::API::Object;
use Data::Dump qw( dump );
use MRO::Compat;
use mro 'c3';
use Data::Pageset;
use Time::HiRes qw( time );
use Sort::SQL;

our $VERSION = '0.06';

__PACKAGE__->mk_accessors(
    qw( swish swish_error pages_per_set page_size indexes debug ));

=head1 NAME

Catalyst::Model::SWISH - Catalyst model for Swish-e

=head1 SYNOPSIS

 # in your Controller
 sub search : Local
 {
    my ($self, $c) = @_;
    my ($pager, $results, $query, $order, $total, $stime, $btime) = 
     $c->model('SWISH')->search(
        query         => $c->request->params->{q},
        page          => $c->request->params->{page} || 0,
        page_size     => $c->request->params->{itemsperpage} || 0,
        limit_to      => 'swishtitle',
        limit_high    => 'm',
        limit_low     => 'a',
        order_by      => 'swishrank desc swishtitle asc'
     );
    $c->stash(search => 
        {
            results  => $results,
            pager    => $pager,
            query    => $query,
            order    => $order,
            hits     => $total,
            search_time => $stime,
            build_time  => $btime
        }
    );
 }
 

=head1 DESCRIPTION

Catalyst::Model::SWISH provides an easy interface to 
SWISH::API::Object for searching
Swish-e indexes (http://www.swish-e.org/). It is similar to and inspired by
Catalyst::Model::Xapian.

=head1 CONFIGURATION

The configuration value is passed directly to SWISH::API::Object->new()
so see the SWISH::API::Object documentation for possible key/value options.

=over

=item indexes

Path(s) to the Swish-e index(es). Defaults to <MyApp>/index.swish-e.

=item page_size

Default page sizes for L<Data::Pageset>. Defaults to 10.

=back

You may set config options either in your root class config() method prior to
setup(), using your Model class name as the key, or set them directly in
your Model class config() method.

Examples:

 # in MyApp.pm
 MyApp->config(
    'MyApp::Model::SWISH' => { 
        'indexes' => MyApp->path_to('some/where/index.swish-e')->stringify
        }
    );
    
 # same thing in MyApp/Model/SWISH.pm
 MyApp::Model::SWISH->config(
    'indexes' => MyApp->path_to('some/where/index.swish-e')->stringify
    );
 


=head1 METHODS

=head2 new

Constructor is called automatically by Catalyst at startup.

=cut

__PACKAGE__->config(
    pages_per_set => 10,
    page_size     => 10,
);

sub new {
    my ( $class, $c ) = @_;
    my $self = $class->next::method($c);

    # this config merging pointless if the default __PACKAGE__->config()
    # works above.

    #    my %config = (
    #        indexes => $self->config->{indexes}
    #            || $c->config->{$class}->{indexes}
    #            || $c->config->{home} . '/index.swish-e',
    #        pages_per_set => $self->config->{pages_per_set} || 10
    #    );
    #
    #    $c->merge_config_hashes( \%config, $self->config ) if $self->config;
    #    $c->merge_config_hashes( \%config, $c->config->{$class} )
    #        if exists $c->config->{$class};
    #
    #    # page_size == 0 is a valid value, so we must use defined()
    #    unless ( defined $config{page_size} ) {
    #        if ( defined $self->config->{page_size} ) {
    #            $config{page_size} = $self->config->{page_size};
    #        }
    #        elsif ( defined $c->config->{$class}->{page_size} ) {
    #            $config{page_size} = $c->config->{$class}->{page_size};
    #        }
    #        else {
    #            $config{page_size} = 10;
    #        }
    #    }

    #    $self->config( \%config );

    $self->{app_class} = ref($c) || $c;    # for logging
    $self->{indexes} ||= $c->path_to('index.swish-e') . '';    # stringify

    $self->debug
        and $self->{app_class}
        ->log->debug( sprintf( "%s: %s", ref($self), dump $self ) );

    return $self;
}

=head2 search( I<opts> )

Perform a search on the index.

In array context, returns (in order):

=over

=item 

a L<Data::Pageset> object

=item

an arrayref of SWISH::API::Result objects.

=item

an arrayref of parsed query terms

=item

an arrayref of property sort order, where each array item is a hashref like:

 { property => asc | desc }

=item

the total number of hits

=item

the search time

=item

the build time

=back

In scalar context, returns a hashref with the same values, with keys:

=over

=item pager

=item results

=item query

=item order

=item hits

=item search_time

=item build_time

=back

I<opts> require a C<query> name/value pair at minimum. Other
valid params include:

=over

=item page

Which page to start on. Used in cooperation with C<page_size> set in new().

=item order_by

Sort results by a property other than rank.

=item limit_to

Property name to limit results by.

=item limit_high

High value for C<limit_to>.

=item limit_low

Low value for C<limit_to>.

=back

=cut

sub search {
    my $self = shift;
    my %opts = @_;

    $opts{query} or croak "query required";

    my ( $swish, $search, $search_time, $build_time, $start_time, $results,
        $pager, $search_results );

    $search_results = { results => [], hits => 0 };

    $swish = $self->{swish} || $self->connect;
    $search = $swish->new_search_object;
    croak( $self->swish_error ) if $self->_check_err;
    $self->set_search_opts( $search, \%opts );
    croak( $self->swish_error ) if $self->_check_err;

    $start_time = time();
    $results    = $search->execute( $opts{query} );
    croak( $self->swish_error ) if $self->_check_err;

    if ( $self->debug ) {
        $self->{app_class}->log->debug(
            sprintf(
                "%s: query '%s': %d hits",
                ref($self), $opts{query}, $results->hits
            )
        );
        croak( $self->swish_error ) if $self->_check_err;
    }

    $search_time = sprintf( '%0.4f', time() - $start_time );

    if ( $results->hits ) {
        my $start       = ( $opts{page} - 1 ) * $opts{page_size};
        my $build_start = time();
        $self->seek_result( $results, $start ) unless $start > $results->hits;
        croak( $self->swish_error ) if $self->_check_err;
        $search_results = $self->get_results( $results, \%opts );
        croak( $self->swish_error ) if $self->_check_err;

        unless ( $opts{page_size} == 0 ) {

            $pager = Data::Pageset->new(
                {   total_entries    => $search_results->{hits},
                    entries_per_page => $opts{page_size},
                    current_page     => $opts{page},
                    pages_per_set    => $opts{pages_per_set},
                    mode             => 'slide',
                }
            );

        }

        $build_time = sprintf( '%0.4f', time() - $build_start );
    }

    return wantarray
        ? (
        $pager,
        $search_results->{results},
        [ $results->parsed_words( $swish->indexes->[0] ) ],
        Sort::SQL->string2array( $opts{order_by} || 'swishrank desc' ),
        $search_results->{hits},
        $search_time,
        $build_time
        )
        : {
        pager   => $pager,
        results => $search_results->{results},
        query   => [ $results->parsed_words( $swish->indexes->[0] ) ],
        order =>
            Sort::SQL->string2array( $opts{order_by} || 'swishrank desc' ),
        hits        => $search_results->{hits},
        search_time => $search_time,
        build_time  => $build_time
        };
}

=head2 seek_result( I<results_object>, I<start_offset> )

Calls the I<results_object> seek_result() method, setting it to
I<start_offset>.

=cut

sub seek_result {
    my ( $self, $results, $start ) = @_;
    $results->seek_result($start);
}

=head2 set_search_opts( I<search_object>, I<opts> )

I<search_object> is a SWISH::API::More::Search object.

I<opts> is a hashref. This method is called within search().
Override it to set per-search options other than the defaults.

=cut

sub set_search_opts {
    my $self   = shift;
    my $search = shift or croak "Search object required";
    my $opts   = shift or croak "opts hashref required";

    $opts->{page} ||= 1;
    $opts->{page_size}
        = defined( $opts->{page_size} )
        ? $opts->{page_size}
        : $self->config->{page_size};
    $opts->{pages_per_set} ||= $self->config->{pages_per_set};

    if ( $opts->{limit_to} ) {
        defined $opts->{limit_high}
            or croak "limit_high required with limit_to";
        defined $opts->{limit_low}
            or croak "limit_low required with limit_to";

        $search->set_search_limit( $opts->{limit_to}, $opts->{limit_low},
            $opts->{limit_high} );
        if ( $self->_check_err ) {
            croak( $self->swish_error );
            return;
        }
    }
    if ( $opts->{order_by} ) {
        $search->set_sort( $opts->{order_by} );
    }

    return $opts;
}

=head2 get_results( I<results_object>, I<opts> )

Loops over I<results_object> calling next_result().
I<opts> is the same hashref passed to set_search_opts().

Returns a hashref with the following key pairs:

=over

=item hits

Total number of hits for query.

=item count

Total number of results in current search.

=item results

Arrayref of result objects.

=back

=cut

sub get_results {
    my $self    = shift;
    my $results = shift or croak "SWISH::API::Object::Results required";
    my $opts    = shift or croak "search opts hashref required";
    my $count   = 0;
    my @r;
    while ( my $r = $results->next_result ) {
        push( @r, $r );
        if ( ++$count >= $opts->{page_size} && $opts->{page_size} != 0 ) {
            last;
        }
    }
    return { hits => $results->Hits, count => $count, results => \@r };
}

sub _check_err {
    my $self = shift;

    if ( $self->{swish}->error ) {
        $self->swish_error(
                  ref($self) . ": "
                . $self->{swish}->error_string . ": "
                . $self->{swish}->last_error_msg );
        return 1;
    }
    return 0;
}

=head2 connect

Calling connect() will DESTROY the cached SWISH::API::Object object and re-cache
it, essentially re-opening the Swish-e index.

B<NOTE:> SWISH::API::Object actually makes this unnecessary in most cases,
since it inherits from SWISH::API::Stat.

Returns the SWISH::API::Object instance.

=cut

sub connect {
    my $self = shift;
    $self->{swish} = SWISH::API::Object->new(
        indexes => $self->indexes,
        debug   => $self->debug
    );
    croak $self->swish_error if $self->_check_err;

    # use RankScheme 1 if the index supports it
    my $rs = $self->{swish}->header_value( $self->{swish}->indexes->[0],
        'IgnoreTotalWordCountWhenRanking' );
    if ( !$rs ) {
        $self->{swish}->rank_scheme(1);
    }

    return $self->{swish};
}

1;

__END__

=head1 AUTHOR

Peter Karman <perl@peknet.com>

Thanks to Atomic Learning, Inc for sponsoring the development of this module.

=head1 LICENSE

This library is free software. You may redistribute it and/or modify it under
the same terms as Perl itself.


=head1 SEE ALSO

http://www.swish-e.org/, SWISH::API::Object

=cut