#!/usr/bin/perl
$|++;
my
$VERSION
=
'0.05'
;
Hide Show 15 lines of Pod
use
lib
qw(./lib ../lib)
;
my
$DEBUG
= 0;
my
%defaults
= (
'address'
=>
'data/addresses.txt'
,
'mailrc'
=>
'data/01mailrc.txt'
,
);
my
(
%parsed_map
,
%cpan_map
,
%pause_map
,
%unparsed_map
,
%address_map
,
%domain_map
,
%target_map
,
%author_map
,
%named_map
);
my
(
%result
,
%options
);
my
$parsed
= 0;
init_options();
load_addresses();
check()
if
(
$options
{check});
build()
if
(
$options
{build});
sub
check {
for
my
$name
(
keys
%named_map
) {
my
@rows
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM testers.profile WHERE name=?"
,
$name
);
if
(
@rows
) {
next
if
(
$rows
[0]->{pause} eq
$named_map
{
$name
});
_log(
"UPDATE testers.profile SET pause='$named_map{$name}' WHERE name='$name'"
);
}
else
{
}
}
for
my
$pause
(
keys
%author_map
) {
my
@rows
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM testers.profile WHERE pause=?"
,
$pause
);
next
if
(
@rows
);
_log(
"PAUSE missing: $pause => $author_map{$pause}"
);
}
for
my
$address
(
keys
%parsed_map
) {
my
@rows
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM testers.address a LEFT JOIN testers.profile p ON p.testerid=a.testerid WHERE a.address=?"
,
$address
);
if
(
@rows
) {
if
(
$rows
[0]->{name}) {
my
$name
=
$rows
[0]->{name} . (
$rows
[0]->{pause} ?
" ($rows[0]->{pause})"
:
''
);
next
if
(
$parsed_map
{
$address
} eq
$name
);
_log(
"NAME MAP missing: $name => $address => $parsed_map{$address}"
);
}
else
{
my
(
$name
,
$pause
) =
$parsed_map
{
$address
} =~ /^(.*?)(?:(?:\s+\((\w+)\))|$)/;
next
if
(
$name
=~ /\@/);
if
(
$pause
) {
my
@pause
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM testers.profile WHERE pause=?"
,
$pause
);
if
(
@pause
) {
$options
{source}->do_query(
"UPDATE testers.address SET testerid=? WHERE addressid=?"
,
$pause
[0]->{testerid},
$rows
[0]->{addressid});
_log(
"-- UPDATE testers.address SET testerid=$pause[0]->{testerid} WHERE addressid=$rows[0]->{addressid};"
);
}
else
{
@pause
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM testers.profile WHERE name=?"
,
$name
);
if
(
@pause
) {
$options
{source}->do_query(
"UPDATE testers.address SET testerid=? WHERE addressid=?"
,
$pause
[0]->{testerid},
$rows
[0]->{addressid});
_log(
"-- UPDATE testers.address SET testerid=$pause[0]->{testerid} WHERE addressid=$rows[0]->{addressid};"
);
}
else
{
_log(
"INSERT testers.profile SET name='$name', pause='$pause';"
);
}
}
}
else
{
my
@pause
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM testers.profile WHERE name=?"
,
$name
);
if
(
@pause
) {
$options
{source}->do_query(
"UPDATE testers.address SET testerid=? WHERE addressid=?"
,
$pause
[0]->{testerid},
$rows
[0]->{addressid});
_log(
"-- UPDATE testers.address SET testerid=$pause[0]->{testerid} WHERE addressid=$rows[0]->{addressid};"
);
}
else
{
_log(
"INSERT testers.profile SET name='$name';"
);
}
}
}
}
else
{
@rows
=
$options
{source}->get_query(
'hash'
,
"SELECT * FROM cpanstats WHERE tester = ?"
,
$address
);
if
(
@rows
) {
}
else
{
}
}
}
}
sub
build {
my
$next
;
if
(
$options
{max}) {
my
@rows
=
$options
{source}->get_query(
'array'
,
"SELECT MAX(id) FROM testers.ixreport"
);
$options
{from} =
$rows
[0]->[0]
if
(
@rows
);
}
if
(
$options
{from}) {
$next
=
$options
{source}->iterator(
'hash'
,
"SELECT * FROM cpanstats WHERE type=2 AND id >= $options{from} ORDER BY id"
);
}
else
{
$next
=
$options
{source}->iterator(
'hash'
,
"SELECT * FROM cpanstats WHERE type=2 ORDER BY id"
);
}
while
(
my
$row
=
$next
->()) {
my
(
$testerid
,
$addressid
,
$email
);
my
@address
=
$options
{source}->get_query(
'hash'
,
'SELECT * FROM testers.address WHERE address=?'
,
$row
->{tester});
if
(
$address
[0]) {
$testerid
=
$address
[0]->{testerid};
$addressid
=
$address
[0]->{addressid};
$email
=
$address
[0]->{email};
}
else
{
$email
= extract_email(
$row
->{tester});
$addressid
=
$options
{source}->id_query(
'INSERT INTO testers.address SET testerid=0,address=?,email=?'
,
$row
->{tester},
$email
);
_log(
"Creating address entry: $row->{tester},$email,$addressid"
);
$testerid
= 0;
}
my
@report
=
$options
{source}->get_query(
'hash'
,
'SELECT * FROM testers.ixreport WHERE id=?'
,
$row
->{id});
if
(
$report
[0]) {
_log(
"Updating report index: $report[0]->{id},$row->{tester},$email,$addressid"
)
if
(
$options
{verbose});
$options
{source}->do_query(
'UPDATE testers.ixreport SET guid=?,fulldate=?,addressid=? WHERE id=?'
,
$row
->{guid},
$row
->{fulldate},
$addressid
,
$report
[0]->{id});
}
else
{
_log(
"Creating report index: $row->{id},$row->{tester},$email,$addressid"
)
if
(
$options
{verbose});
$options
{source}->do_query(
'INSERT INTO testers.ixreport SET id=?,guid=?,fulldate=?,addressid=?'
,
$row
->{id},
$row
->{guid},
$row
->{fulldate},
$addressid
);
}
my
$target
=
$parsed_map
{
$row
->{tester}};
$target
=
$address_map
{
$email
}
unless
(
$target
);
$target
=
$cpan_map
{
$email
}
unless
(
$target
);
unless
(
$target
) {
my
@rows
=
$options
{source}->get_query(
'hash'
,
'SELECT fullname FROM metabase.testers_email WHERE email=? or email=?'
,
$email
,
$row
->{tester});
$target
=
$rows
[0]->{fullname}
if
(
@rows
);
}
if
(
$target
) {
my
(
$name
,
$pause
) =
$target
=~ /^(.*?)(?:(?:\s+\((\w+)\))|$)/;
my
$profile
;
if
(
$pause
) {
my
@rows
=
$options
{source}->get_query(
'hash'
,
'SELECT * FROM testers.profile WHERE pause=?'
,
$pause
);
$profile
=
$rows
[0]
if
(
@rows
);
}
if
(!
$profile
&&
$name
) {
my
@rows
=
$options
{source}->get_query(
'hash'
,
'SELECT * FROM testers.profile WHERE name=?'
,
$name
);
$profile
=
$rows
[0]
if
(
@rows
);
}
if
(
$profile
) {
if
(
$testerid
!=
$profile
->{testerid}) {
_log(
"Updating address entry from profile: $row->{tester},$email,$addressid,$profile->{testerid},$name,$pause"
)
if
(
$options
{verbose});
$options
{source}->do_query(
'UPDATE testers.address SET testerid=? WHERE addressid=?'
,
$profile
->{testerid},
$addressid
);
}
}
elsif
(
$name
) {
_log(
"Creating profile: $row->{tester},$email,$addressid,-,$name,$pause"
);
my
$id
=
$options
{source}->id_query(
'INSERT INTO testers.profile SET name=?,pause=?'
,
$name
,
$pause
);
$options
{source}->do_query(
'UPDATE testers.address SET testerid=? WHERE addressid=?'
,
$id
,
$addressid
);
_log(
"Updating address entry from profile: $row->{tester},$email,$addressid,$profile->{testerid},$name,$pause"
)
if
(
$options
{verbose});
}
}
else
{
_log(
"No target found: $row->{tester},$email,$addressid"
);
}
}
}
sub
load_addresses {
my
$fh
= IO::File->new(
$options
{address}) or
die
"Cannot open address file [$options{address}]: $!"
;
while
(<
$fh
>) {
s/\s+$//;
next
if
(/^$/);
my
(
$source
,
$target
) = (/(.*),(.*)/);
next
unless
(
$source
&&
$target
);
$parsed_map
{
$source
} =
$target
;
my
$email
= extract_email(
$source
);
next
unless
(
$email
);
my
(
$local
,
$domain
) =
split
(/\@/,
$email
);
$address_map
{
$email
} =
$target
;
$domain_map
{
$domain
} =
$target
;
$target_map
{
$target
} =
$email
;
my
(
$author
) = (
$target
=~ /\(([A-Z0-9]+)\)/);
$author_map
{
$author
} =
$email
if
(
$author
);
}
$fh
->
close
;
if
(
$options
{verbose}) {
_log(
"parsed entries = "
.
scalar
(
keys
%parsed_map
));
_log(
"address entries = "
.
scalar
(
keys
%address_map
));
_log(
"domain entries = "
.
scalar
(
keys
%domain_map
));
}
$fh
= IO::File->new(
$options
{mailrc}) or
die
"Cannot open mailrc file [$options{mailrc}]: $!"
;
while
(<
$fh
>) {
s/\s+$//;
next
if
(/^$/);
my
(
$alias
,
$name
,
$email
) = (/alias\s+([A-Z]+)\s+
"([^<]+) <([^>]+)>"
/);
next
unless
(
$alias
);
$named_map
{
$name
} =
"$alias"
;
$pause_map
{
lc
(
$alias
)} =
"$name ($alias)"
;
$cpan_map
{
lc
(
$email
)} =
"$name ($alias)"
;
}
$fh
->
close
;
if
(
$options
{verbose}) {
_log(
"pause entries = "
.
scalar
(
keys
%pause_map
));
_log(
"cpan entries = "
.
scalar
(
keys
%cpan_map
));
}
}
sub
extract_email {
my
$address
=
shift
;
my
(
$email
) =
$address
=~ /([-+=\w]+\@(?:[-\w]+\.)+(?:[a-z]{2,}))/i;
return
lc
$email
;
}
sub
init_options {
GetOptions( \
%options
,
'config=s'
,
'build'
,
'check'
,
'max'
,
'verbose'
,
'help|h'
,
'version|V'
);
_help(1)
if
(
$options
{help});
_help(0)
if
(
$options
{version});
die
"Configuration file [$options{config}] not found\n"
unless
(-f
$options
{config});
my
$cfg
= Config::IniFiles->new(
-file
=>
$options
{config} );
my
%opts
=
map
{
$_
=>
$cfg
->val(
'CPANSTATS'
,
$_
);}
qw(driver database dbfile dbhost dbport dbuser dbpass)
;
$options
{source} = CPAN::Testers::Common::DBUtils->new(
%opts
);
die
"Cannot configure SOURCE database\n"
unless
(
$options
{source});
for
my
$opt
(
qw(address mailrc verbose logfile logclean)
) {
$options
{
$opt
} ||=
$cfg
->val(
'MASTER'
,
$opt
) ||
$defaults
{
$opt
};
}
}
sub
_help {
my
$full
=
shift
;
if
(
$full
) {
print
<<HERE;
Usage: $0 \\
[-config=<file>] [--build [--max | --from=<id>]] [--check] [-h] [-V]
--config=<file> database configuration file
--build build testers database
--max build from the last id
--from=<id> build from a specific id
--check checks whether the loaded data has been saved
-h this help screen
-V program version
HERE
}
print
"$0 v$VERSION\n"
;
exit
(0);
}
sub
_log {
return
unless
(
$options
{logfile});
my
$mode
=
$options
{logclean} ?
'w+'
:
'a+'
;
my
$log
= IO::File->new(
$options
{logfile},
$mode
) or
die
"Cannot open file [$options{logfile}]: $!\n"
;
$options
{logclean} = 0;
my
$ts
= DateTime->now->datetime();
print
$log
join
(
' '
,
$ts
,
@_
) .
"\n"
;
$log
->
close
;
}
Hide Show 34 lines of Pod