#!/usr/bin/perl -w

package Mail::Miner::Recogniser::Address;

$Mail::Miner::recognisers{"".__PACKAGE__} = 
    {
     title    => "Physical Addresses",
     help     => "Match messages which contain an address",
     keyword  => "address",
    };

my $us_state = qr/(?:A[LKSZREAEEEP]|
   C[AOT]|D[EC]|F[ML]|G[AU]|HI|I[DLNA]|K[SY]|LA|M[EHDAINSOTP]|N[EVHJMYCD]|
   O[HKR]|P[WAR]|RI|S[CD]|T[NX]|UT|V[TIA]|W[AVIY])/x;
my $uk_post_town =
   qr/(?:AL|B[ABDHLNRST]?|C[ABFHMORTVW]|D[ADEGHLNTY]|E[CHNX]?|F[KY]|
   G[LUY]?|H[ADGPRSUX]|I[GMPV]|JE|K[ATWY]|L[ADELNSU]?|M[EKL]?|N[EGNPRW]?|
   O[LX]|P[ORAEHL]|R[GHM]|S[AEGKLMNOPRSTWY]?|T[ADFNQRSW]|UB|W[ACDFNRSV]?|
   YO|ZE)/x;

my $uk_postcode = qr/$uk_post_town\d{1,3}[ \t]+\d{1,2}[A-Z][A-Z]/;
my $us_zipcode = qr/$us_state[ \t]+\d{5}/;

sub process {
    my ($class, %hash) = @_;

    my @lines = split /\n/, $hash{getbody}->();
    my @found;
    my $last =0;

    for (0..$#lines) {
        if ($lines[$_] =~ /(.*\b($uk_postcode|$us_zipcode)\b)/) {
            if ($_ - $last > 10) { $last = $_-10 } # Max of 10 lines
            my $address = join "\n", @lines[$last+1..$_];
            # Trim whitespace and quoters
            $address =~ s/^\s*
                           (?:[A-Z][A-Z]>)? # SUPERCITE
                           [\s>:]+//msgox;
            push @found, $address;
        } elsif ($lines[$_] !~ /\w/) {
            $last = $_;
        }
    }

    return @found;
}

1;