package Lingua::GL::Stemmer;
$Lingua::GL::Stemmer::VERSION = '0.02';
use 5.006;
use strict;
use warnings;
my $aa = "\xe1";
my $ea = "\xe9";
my $ia = "\xed";
my $oa = "\xf3";
my $ua = "\xfa";
my $at = "\xe3";
my $ot = "\xf5";
my $nt = "\xf1";
my $ac = "\xe2";
my $ec = "\xea";
my $cc = "\xe7";
my %rule;

$rule{plural} = {
    "ns"  => [ 1, "n" ],
    "${ot}es" => [ 3, "${ot}n" ],
    "${at}es" => [ 1, "${at}o" ],
    "ais" => [ 1, "al" ],
    "${ea}is" => [ 2, "el" ],
    "eis" => [ 2, "el" ],
    "${oa}is" => [ 2, "ol" ],
    "ois" => [ 2, "ol" ],
    "${ia}s"  => [ 2, "il" ],
    "les" => [ 2, "l" ],
    "res" => [ 3, "r" ],
    "s"   => [ 2, "" ],
};

$rule{femin} = {
    "ona" => [ 3, "${oa}n" ],
    "oa" => [ 3, "${oa}n" ],
    "ora" => [ 3, "or" ],
    "na" => [ 4, "no" ],
    "inha" => [ 3, "inho" ],
    "i${nt}a" => [ 3, "i${nt}o" ],
    "esa" => [ 3, "${ea}s" ],
    "osa" => [ 3, "oso" ],
    "${ia}aca" => [ 3, "${ia}aco" ],
    "ica" => [ 3, "ico" ],
    "ada" => [ 3, "ado" ],
    "ida" => [ 3, "ido" ],
    "${ia}da" => [ 3, "ido" ],
    "ana" => [ 2, "${aa}n" ],
    "${aa}ria" => [ 3, "${aa}rio" ],
    "ima" => [ 3, "imo" ],
    "iva" => [ 3, "ivo" ],
    "eira" => [ 3, "eiro" ],
    "${at}" => [ 2, "${at}o" ],
    "${aa}" => [ 2, "${at}n" ],
};

$rule{augment} = {
    "d${ia}ssimo" => [ 5, '' ],
    "d${ia}simo" => [ 5, '' ],
    "abil${ia}ssimo" => [ 5,'' ],
    "abil${ia}simo" => [ 5,'' ],
    "${ia}ssimo" => [ 3,'' ],
    "${ia}simo" => [ 3,'' ],
    "${ea}simo" => [ 3,'' ],
    "${ea}sima" => [ 3,'' ],
    "${ea}rrimo" => [ 4,'' ],
    "${ea}rrima" => [ 4,'' ],
    "zinho" => [ 2,'' ],
    "ci${nt}o" => [ 2,'' ],
    "a${cc}o" => [ 4, '' ],
    "a${cc}a" => [ 4, '' ],
    "azo" => [ 4, '' ],
    "aza" => [ 4, '' ],
    "ad${at}o" => [ 4, '' ],
    "acho" => [ 2, '' ],
    "acha" => [ 2, '' ],
    "adinho" => [ 3, '' ],
    "adi${nt}o" => [ 3, '' ],
    "alh${aa}m" => [ 4, '' ],
    "alh${at}o" => [ 4, '' ],
    "all${aa}n" => [ 4, '' ],
    "allo" => [ 4, '' ],
    "alla" => [ 4, '' ],
    "z${at}o" => [ 2,'' ],
    "z${oa}n" => [ 2,'' ],
    "zom" => [ 2,'' ],
    "${aa}n" => [ 4, '' ],
    "${oa}n" => [ 3, '' ],
    "${at}o" => [ 3, '' ],
    "arra" => [ 3,'' ],
    "astro" => [ 3,'' ],
    "${aa}zio" => [ 3,'' ],
    "echo" => [ 3,'' ],
    "echa" => [ 3,'' ],
    "edela" => [ 3,'' ],
    "ela" => [ 4,'' ],
    "elo" => [ 4,'' ],
    "eta" => [ 3,'' ],
    "ete" => [ 3,'' ],
    "ica" => [ 3,'' ],
    "id${at}o" => [ 3,'' ],
    "quinho" => [ 4, "c" ],
    "qui${nt}o" => [ 4, "c" ],
    "uinho" => [ 4,'' ],
    "ui${nt}o" => [ 4,'' ],
    "inho" => [ 3,'' ],
    "i${nt}o" => [ 3,'' ],
    "ito" => [ 3, '' ],
    "ocho" => [ 4, '' ],
    "ocha" => [ 4, '' ],
    "oide" => [ 3, '' ],
    "ola" => [ 3, '' ],
    "olo" => [ 3, '' ],
    "ote" => [ 3, '' ],
    "ota" => [ 3, '' ],
    "u${cc}a" => [ 4,'' ],
    "ucha" => [ 3,'' ],
    "ucho" => [ 3,'' ],
    "uco" => [ 4,'' ],
    "uza" => [ 4,'' ],
    "uxa" => [ 3,'' ],
};


$rule{noun} = {
    "abilidade" => [ 5, "" ],
    "${aa}bel" => [ 2, "" ],
    "able" => [ 2, "" ],
    "aci" => [ 3, "" ],
    "a${cc}" => [ 3, "" ],
    "adeiro" => [ 3, "" ],
    "ador" => [ 3, "" ],
    "ado" => [ 2, "" ],
    "agem" => [ 3, "" ],
    "age" => [ 3, "" ],
    "alismo" => [ 4, "" ],
    "al${ia}stico" => [ 3, "" ],
    "alista" => [ 5, "" ],
    "alizado" => [ 4, "" ],
    "alizaci" => [ 5, "" ],
    "aliza${cc}" => [ 5, "" ],
    "alizaz" => [ 5, "" ],
    "al" => [ 4, "" ],
    "ancia" => [ 4, "" ],
    "${aa}ncia" => [ 4, "" ],
    "${ac}ncia" => [ 4, "" ],
    "ano" => [ 4, "" ],
    "ante" => [ 2, "" ],
    "ario" => [ 3, "" ],
    "${aa}rio" => [ 3, "" ],
    "${aa}stico" => [ 4, "" ],
    "ativo" => [ 4, "" ],
    "atizado" => [ 4, "" ],
    "atizaci" => [ 4, "" ],
    "atiza${cc}" => [ 4, "" ],
    "atizaz" => [ 4, "" ],
    "atoria" => [ 5, "" ],
    "at${oa}ria" => [ 5, "" ],
    "atorio" => [ 3, "" ],
    "at${oa}rio" => [ 3, "" ],
    "${aa}utico" => [ 4, "" ],
    "ico" => [ 4, "" ],
    "auta" => [ 5, "" ],
    "${aa}vel" => [ 2, "" ],
    "axe" => [ 3, "" ],
    "az" => [ 3, "" ],
    "bel" => [ 5, "" ],
    "bil" => [ 0, "vel" ],
    "ble" => [ 5, "" ],
    "cionista" => [ 5, "" ],
    "edeiro" => [ 3, "" ],
    "eiro" => [ 3, "" ],
    "edouro" => [ 3, "" ],
    "edor" => [ 3, "" ],
    "dor" => [ 2, "" ],
    "encialista" => [ 4, "" ],
    "encial" => [ 5, "" ],
    "${ec}ncia" => [ 3, "" ],
    "encia" => [ 3, "" ],
    "${ea}ncia" => [ 3, "" ],
    "ense" => [ 3, "" ],
    "ente" => [ 4, "" ],
    "erio" => [ 6, "" ],
    "${ea}rio" => [ 6, "" ],
    "esco" => [ 4, "" ],
    "${ec}utico" => [ 4, "" ],
    "${ea}utico" => [ 4, "" ],
    "eza" => [ 3, "" ],
    "ez" => [ 4, "" ],
    "${ia}aco" => [ 3, "" ],
    "ial" => [ 3, "" ],
    "iamento" => [ 4, "" ],
    "amento" => [ 3, "" ],
    "imento" => [ 3, "" ],
    "emento" => [ 3, "" ],
    "mento" => [ 6, "" ],
    "${ia}bel" => [ 5, "" ],
    "ible" => [ 5, "" ],
    "icionista" => [ 4, "" ],
    "iza${cc}" => [ 5, "" ],
    "izaci" => [ 5, "" ],
    "izaz" => [ 5, "" ],
    "ice" => [ 4, "" ],
    "ici" => [ 3, "" ],
    "i${cc}" => [ 3, "" ],
    "iz" => [ 3, "" ],
    "idade" => [ 4, "" ],
    "ideiro" => [ 3, "" ],
    "ideira" => [ 3, "" ],
    "ido" => [ 3, "" ],
    "idor" => [ 4, "" ],
    "inal" => [ 3, "" ],
    "ional" => [ 4, "" ],
    "ionar" => [ 5, "" ],
    "ionista" => [ 5, "" ],
    "ismo" => [ 3, "" ],
    "ista" => [ 3, "" ],
    "${ia}vel" => [ 5, "" ],
    "ividade" => [ 5, "" ],
    "ivo" => [ 4, "" ],
    "izado" => [ 5, "" ],
    "or" => [ 3, "" ],
    "oria" => [ 3, "" ],
    "or${ia}a" => [ 4, "" ],
    "oso" => [ 3, "" ],
    "queiro" => [ 3, "c" ],
    "quice" => [ 4, "c" ],
    "rio" => [ 5, "" ],
    "sor" => [ 2, "" ],
    "tico" => [ 3, "" ],
    "tivo" => [ 4, "" ],
    "tizado" => [ 4, "" ],
    "tiza${cc}" => [ 5, "" ],
    "tizaci" => [ 5, "" ],
    "tizaz" => [ 5, "" ],
    "tor" => [ 5, "" ],
    "ual" => [ 3, "" ],
    "uoso" => [ 3, "" ],
    "ura" => [ 4, "" ],
    "vel" => [ 5, "" ],
};


$rule{verb} = {
    "aba"  => [ 2, "" ],
    "abade" => [ 2, "" ],
    "${aa}bade" => [ 2, "" ],
    "abamo" => [ 2, "" ],
    "${aa}bamo" => [ 2, "" ],
    "aban" => [ 2, "" ],
    "ache" => [ 2, "" ],
    "ade" => [ 2, "" ],
    "ai" => [ 2, "" ],
    "am" => [ 2, "" ],
    "amo" => [ 2, "" ],
    "an" => [ 2, "" ],
    "ando" => [ 2, "" ],
    "ar" => [ 2, "" ],
    "ara" => [ 2, "" ],
    "ar${aa}" => [ 2, "" ],
    "arade" => [ 2, "" ],
    "${aa}rade" => [ 2, "" ],
    "aram" => [ 2, "" ],
    "ar${aa}m" => [ 2, "" ],
    "aramo" => [ 2, "" ],
    "${aa}ramo" => [ 2, "" ],
    "ar${aa}n" => [ 2, "" ],
    "ar${at}o" => [ 2, "" ],
    "arde" => [ 2, "" ],
    "are" => [ 2, "" ],
    "arei" => [ 2, "" ],
    "${aa}rei" => [ 2, "" ],
    "arem" => [ 2, "" ],
    "aremo" => [ 2, "" ],
    "aria" => [ 2, "" ],
    "ar${ia}a" => [ 2, "" ],
    "ariade" => [ 2, "" ],
    "ar${ia}ade" => [ 2, "" ],
    "ariam" => [ 2, "" ],
    "ariamo" => [ 2, "" ],
    "ar${ia}amo" => [ 2, "" ],
    "ar${ia}ei" => [ 2, "" ],
    "armo" => [ 2, "" ],
    "${aa}rom" => [ 2, "" ],
    "aron" => [ 2, "" ],
    "ase" => [ 2, "" ],
    "asede" => [ 2, "" ],
    "${aa}sede" => [ 2, "" ],
    "asemo" => [ 2, "" ],
    "${aa}semo" => [ 2, "" ],
    "asen" => [ 2, "" ],
    "asse" => [ 2, "" ],
    "${aa}ssei" => [ 2, "" ],
    "assem" => [ 2, "" ],
    "${aa}ssemo" => [ 2, "" ],
    "aste" => [ 2, "" ],
    "ava" => [ 2, "" ],
    "avam" => [ 2, "" ],
    "${aa}vamo" => [ 2, "" ],
    "avan" => [ 2, "" ],
    "${aa}vei" => [ 2, "" ],
    "ear" => [ 4, "" ],
    "ede" => [ 1, "" ],
    "ei" => [ 3, "" ],
    "em" => [ 2, "" ],
    "emo" => [ 2, "" ],
    "en" => [ 2, "" ],
    "endo" => [ 1, "" ],
    "eou" => [ 5, "" ],
    "er" => [ 1, "" ],
    "era" => [ 1, "" ],
    "er${aa}" => [ 1, "" ],
    "erade" => [ 1, "" ],
    "${ea}rade" => [ 1, "" ],
    "eram" => [ 1, "" ],
    "er${aa}m" => [ 1, "" ],
    "eramo" => [ 1, "" ],
    "${ea}ramo" => [ 1, "" ],
    "${ec}ramo" => [ 1, "" ],
    "er${aa}n" => [ 1, "" ],
    "er${at}o" => [ 1, "" ],
    "erde" => [ 1, "" ],
    "ere" => [ 1, "" ],
    "erei" => [ 1, "" ],
    "${ec}rei" => [ 1, "" ],
    "erem" => [ 1, "" ],
    "eremo" => [ 1, "" ],
    "eria" => [ 1, "" ],
    "er${ia}a" => [ 1, "" ],
    "eriade" => [ 1, "" ],
    "er${ia}ade" => [ 1, "" ],
    "eriam" => [ 1, "" ],
    "eriamo" => [ 1, "" ],
    "er${ia}amo" => [ 1, "" ],
    "erian" => [ 1, "" ],
    "er${ia}an" => [ 1, "" ],
    "er${ia}ei" => [ 1, "" ],
    "ermo" => [ 1, "" ],
    "${ec}rom" => [ 1, "" ],
    "eron" => [ 1, "" ],
    "ese" => [ 1, "" ],
    "esedes" => [ 1, "" ],
    "${ea}sedes" => [ 1, "" ],
    "esemo" => [ 1, "" ],
    "${ea}semo" => [ 1, "" ],
    "esen" => [ 1, "" ],
    "esse" => [ 1, "" ],
    "${ec}ssede" => [ 1, "" ],
    "${ec}ssei" => [ 1, "" ],
    "essem" => [ 1, "" ],
    "${ec}ssemo" => [ 1, "" ],
    "este" => [ 1, "" ],
    "eu" => [ 1, "" ],
    "guem" => [ 1, "g" ],
    "i" => [ 1, "" ],
    "ia" => [ 1, "" ],
    "${ia}a" => [ 1, "" ],
    "iade" => [ 1, "" ],
    "${ia}ade" => [ 1, "" ],
    "iam" => [ 1, "" ],
    "iamo" => [ 1, "" ],
    "${ia}amo" => [ 1, "" ],
    "ian" => [ 1, "" ],
    "${ia}an" => [ 1, "" ],
    "iava" => [ 1, "" ],
    "iche" => [ 1, "" ],
    "ide" => [ 1, "" ],
    "${ia}do" => [ 3, "" ],
    "${ia}ei" => [ 1, "" ],
    "im" => [ 1, "" ],
    "imo" => [ 3, "" ],
    "imo" => [ 3, "" ],
    "in" => [ 3, "" ],
    "indo" => [ 3, "" ],
    "iona" => [ 3, "" ],
    "ir" => [ 3, "" ],
    "ira" => [ 3, "" ],
    "ir${aa}" => [ 3, "" ],
    "irade" => [ 3, "" ],
    "${ia}rade" => [ 3, "" ],
    "iram" => [ 3, "" ],
    "ir${aa}m" => [ 3, "" ],
    "${ia}ram" => [ 3, "" ],
    "iramo" => [ 3, "" ],
    "${ia}ramo" => [ 3, "" ],
    "ir${aa}n" => [ 3, "" ],
    "ir${at}o" => [ 2, "" ],
    "irde" => [ 2, "" ],
    "ire" => [ 3, "" ],
    "irei" => [ 3, "" ],
    "irem" => [ 3, "" ],
    "iremo" => [ 3, "" ],
    "iria" => [ 3, "" ],
    "ir${ia}a" => [ 3, "" ],
    "iriade" => [ 3, "" ],
    "ir${ia}ade" => [ 3, "" ],
    "iriam" => [ 3, "" ],
    "iriamo" => [ 3, "" ],
    "ir${ia}amo" => [ 3, "" ],
    "irian" => [ 3, "" ],
    "ir${ia}an" => [ 3, "" ],
    "ir${ia}ei" => [ 3, "" ],
    "irmo" => [ 3, "" ],
    "${ia}rom" => [ 3, "" ],
    "iron" => [ 3, "" ],
    "ise" => [ 3, "" ],
    "isede" => [ 3, "" ],
    "${ia}sede" => [ 3, "" ],
    "isemo" => [ 3, "" ],
    "${ia}semo" => [ 3, "" ],
    "isen" => [ 3, "" ],
    "isse" => [ 3, "" ],
    "${ia}ssede" => [ 3, "" ],
    "${ia}ssei" => [ 3, "" ],
    "issem" => [ 3, "" ],
    "${ia}ssemo" => [ 3, "" ],
    "iste" => [ 4, "" ],
    "itar" => [ 5, "" ],
    "iu" => [ 3, "" ],
    "izar" => [ 3, "" ],
    "omo" => [ 3, "" ],
    "ondo" => [ 3, "" ],
    "ou" => [ 3, "" ],
    "tizar" => [ 4, "" ],
    "uei" => [ 3, "" ],
    "u${ia}a" => [ 5, "u" ],
};

$rule{accent} = {
    $aa => 'a',
    $ea => 'e',
    $ia => 'i',
    $oa => 'o',
    $ua => 'u',
    $at => 'a',
    $ot => 'o',
    $ec => 'e',
    $cc => 'c',
    $nt => 'n',
};

$rule{vowel} = {
    "bil" => [ 2, "vel" ],
    "gue" => [ 2, "g" ],
    "a" => [ 3, "" ],
    "e" => [ 3, "" ],
    "o" => [ 3, "" ],
};

sub strip($$) {
    my $cmd = shift;
    my $word = shift;
    if($cmd eq 'accent'){
        foreach my $a (keys %{$rule{accent}}){
            $word =~ s/$a/$rule{accent}->{$a}/eg;
        }
    }
    elsif($cmd eq 'adv'){       $word =~ s/(.{4,})mente/$1/o;    }
    else{
        my $cmdref = $rule{$cmd};
        for my $key (sort { length $b <=> length $a } keys %{$cmdref}){
            my $patt = join q//, "^(.{", $cmdref->{$key}->[0], ",})", $key, '$';
            if($word =~ /$patt/){
              $word =~ s/$patt/$1.($cmdref->{$key}->[1])/e;
              last;
            }
        }
    }
    return $word;
}


sub stem {
    my @stems;
    foreach ( ref($_[0]) ? @{$_[0]} : @_ ){
        my $word = $_;
        $word = strip('plural', $word) if $word =~ /s$/o;
        $word = strip('femin', $word) if $word =~ /a$/o;
        foreach my $op (qw/augment adv noun verb vowel accent/){
            $word = strip($op, $word);
        }
        push @stems, $word;
    }
    wantarray ? @stems : \@stems;
}

1;
__END__
# Below is stub documentation for your module. You better edit it!

=head1 NAME

Lingua::GL::Stemmer - Galician Stemmer

=head1 SYNOPSIS

  use Lingua::GL::Stemmer;

  Lingua::GL::Stemmer::stem(\@words);

  # or

  Lingua::GL::Stemmer::stem(@words);

=head1 DESCRIPTION

Galician is an endangered language spoken in northwest region of Spain. Galician is morphologically similar to Portuguese but phonetics differs greatly. Due to the morphological similarity between Portuguese and Galician, Portuguese stemming algorithm can be adopted to stem Galician texts.

See L<Lingua::PT::Stemmer> for a sketch of the stemming algorithm, and L<http://bvg.udc.es/recursos_lingua/stemming.html> for stemming rules.

=head1 SEE ALSO

L<Lingua::PT::Stemmer>

Stemming rules
L<http://bvg.udc.es/recursos_lingua/stemming.html>

=head1 COPYRIGHT

xern E<lt>xern@cpan.orgE<gt>

This module is free software; you can redistribute it or modify it under the same terms as Perl itself.

=cut