use
constant
MAGIC_RE
=>
qr/^\015\001\007\011\003/
;
@ISA
@DBNAMES @DB_EXTENSIONS
$NSPAM_MAGIC_TOKEN $NHAM_MAGIC_TOKEN $LAST_EXPIRE_MAGIC_TOKEN $LAST_JOURNAL_SYNC_MAGIC_TOKEN
$NTOKENS_MAGIC_TOKEN $OLDEST_TOKEN_AGE_MAGIC_TOKEN $LAST_EXPIRE_REDUCE_MAGIC_TOKEN
$RUNNING_EXPIRE_MAGIC_TOKEN $DB_VERSION_MAGIC_TOKEN $LAST_ATIME_DELTA_MAGIC_TOKEN
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
}
;
@ISA
=
qw( Mail::SpamAssassin::BayesStore )
;
@DBNAMES
=
qw(toks seen)
;
@DB_EXTENSIONS
= (
''
,
'.db'
);
$DB_VERSION_MAGIC_TOKEN
=
"\015\001\007\011\003DBVERSION"
;
$LAST_ATIME_DELTA_MAGIC_TOKEN
=
"\015\001\007\011\003LASTATIMEDELTA"
;
$LAST_EXPIRE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIRE"
;
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIREREDUCE"
;
$LAST_JOURNAL_SYNC_MAGIC_TOKEN
=
"\015\001\007\011\003LASTJOURNALSYNC"
;
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003NEWESTAGE"
;
$NHAM_MAGIC_TOKEN
=
"\015\001\007\011\003NHAM"
;
$NSPAM_MAGIC_TOKEN
=
"\015\001\007\011\003NSPAM"
;
$NTOKENS_MAGIC_TOKEN
=
"\015\001\007\011\003NTOKENS"
;
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003OLDESTAGE"
;
$RUNNING_EXPIRE_MAGIC_TOKEN
=
"\015\001\007\011\003RUNNINGEXPIRE"
;
sub
new {
my
$class
=
shift
;
$class
=
ref
(
$class
) ||
$class
;
my
$self
=
$class
->SUPER::new(
@_
);
$self
->{supported_db_version} = 3;
$self
->{already_tied} = 0;
$self
->{is_locked} = 0;
$self
->{string_to_journal} =
''
;
$self
;
}
sub
tie_db_readonly {
my
(
$self
) =
@_
;
if
(!HAS_DB_FILE) {
dbg (
"bayes: DB_File module not installed, cannot use Bayes"
);
return
0;
}
return
1
if
(
$self
->{already_tied} &&
$self
->{is_locked} == 0);
my
$main
=
$self
->{bayes}->{main};
if
(!
defined
(
$main
->{conf}->{bayes_path})) {
dbg (
"bayes_path not defined"
);
return
0;
}
$self
->read_db_configs();
my
$path
=
$main
->sed_path (
$main
->{conf}->{bayes_path});
my
$found
=0;
for
my
$ext
(
@DB_EXTENSIONS
) {
if
(-f
$path
.
'_toks'
.
$ext
) {
$found
=1;
last
; } }
if
(!
$found
) {
dbg (
"bayes: no dbs present, cannot tie DB R/O: ${path}_toks"
);
return
0;
}
foreach
my
$dbname
(
@DBNAMES
) {
my
$name
=
$path
.
'_'
.
$dbname
;
my
$db_var
=
'db_'
.
$dbname
;
dbg(
"bayes: $$ tie-ing to DB file R/O $name"
);
tie
%{
$self
->{
$db_var
}},
"DB_File"
,
$name
, O_RDONLY,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666)
or
goto
failed_to_tie;
}
$self
->{db_version} = (
$self
->get_storage_variables())[6];
dbg(
"bayes: found bayes db version "
.
$self
->{db_version});
if
(
$self
->_check_db_version() != 0 ) {
warn
(
"bayes: bayes db version "
.
$self
->{db_version}.
" is not able to be used, aborting!"
);
$self
->untie_db();
return
0;
}
$self
->{already_tied} = 1;
return
1;
failed_to_tie:
warn
"Cannot open bayes databases ${path}_* R/O: tie failed: $!\n"
;
foreach
my
$dbname
(
@DBNAMES
) {
my
$db_var
=
'db_'
.
$dbname
;
next
unless
exists
$self
->{
$db_var
};
dbg(
"bayes: $$ untie-ing DB file $dbname"
);
untie
%{
$self
->{
$db_var
}};
}
return
0;
}
sub
tie_db_writable {
my
(
$self
) =
@_
;
if
(!HAS_DB_FILE) {
dbg (
"bayes: DB_File module not installed, cannot use Bayes"
);
return
0;
}
my
$main
=
$self
->{bayes}->{main};
if
(
$self
->{already_tied} &&
$self
->{is_locked} == 1) {
$main
->{locker}->refresh_lock (
$self
->{locked_file});
return
1;
}
if
(!
defined
(
$main
->{conf}->{bayes_path})) {
dbg (
"bayes_path not defined"
);
return
0;
}
$self
->read_db_configs();
my
$path
=
$main
->sed_path (
$main
->{conf}->{bayes_path});
my
$found
=0;
for
my
$ext
(
@DB_EXTENSIONS
) {
if
(-f
$path
.
'_toks'
.
$ext
) {
$found
=1;
last
; } }
my
$parentdir
= dirname (
$path
);
if
(!-d
$parentdir
) {
eval
{
mkpath (
$parentdir
, 0, (
oct
(
$main
->{conf}->{bayes_file_mode}) & 0777));
};
}
my
$tout
;
if
(
$main
->{learn_wait_for_lock}) {
$tout
= 300;
}
else
{
$tout
= 10;
}
if
(
$main
->{locker}->safe_lock (
$path
,
$tout
)) {
$self
->{locked_file} =
$path
;
$self
->{is_locked} = 1;
}
else
{
warn
"Cannot open bayes databases ${path}_* R/W: lock failed: $!\n"
;
return
0;
}
my
$umask
=
umask
0;
foreach
my
$dbname
(
@DBNAMES
) {
my
$name
=
$path
.
'_'
.
$dbname
;
my
$db_var
=
'db_'
.
$dbname
;
dbg(
"bayes: $$ tie-ing to DB file R/W $name"
);
tie
%{
$self
->{
$db_var
}},
"DB_File"
,
$name
, O_RDWR|O_CREAT,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666)
or
goto
failed_to_tie;
}
umask
$umask
;
$self
->{db_version} = (
$self
->get_storage_variables())[6];
dbg(
"bayes: found bayes db version "
.
$self
->{db_version});
if
(
$found
&& !
$self
->_upgrade_db() ) {
$self
->untie_db();
return
0;
}
elsif
( !
$found
) {
$self
->{db_version} =
$self
->{db_toks}->{
$DB_VERSION_MAGIC_TOKEN
} =
$self
->DB_VERSION;
$self
->{db_toks}->{
$NTOKENS_MAGIC_TOKEN
} = 0;
dbg(
"bayes: new db, set db version "
.
$self
->{db_version}.
" and 0 tokens"
);
}
$self
->{already_tied} = 1;
return
1;
failed_to_tie:
my
$err
= $!;
umask
$umask
;
foreach
my
$dbname
(
@DBNAMES
) {
my
$db_var
=
'db_'
.
$dbname
;
next
unless
exists
$self
->{
$db_var
};
dbg(
"bayes: $$ untie-ing DB file $dbname"
);
untie
%{
$self
->{
$db_var
}};
}
if
(
$self
->{is_locked}) {
$self
->{bayes}->{main}->{locker}->safe_unlock (
$self
->{locked_file});
$self
->{is_locked} = 0;
}
warn
"Cannot open bayes databases ${path}_* R/W: tie failed: $err\n"
;
return
0;
}
sub
_check_db_version {
my
(
$self
) =
@_
;
return
$self
->{db_version} <=>
$self
->DB_VERSION;
}
sub
_upgrade_db {
my
(
$self
) =
@_
;
my
$verschk
=
$self
->_check_db_version();
my
$res
= 0;
my
$umask
;
return
1
if
(
$verschk
== 0 );
if
(
$verschk
== 1 ) {
warn
(
"bayes: bayes db version "
.
$self
->{db_version}.
" is newer than we understand, aborting!"
);
return
0;
}
dbg(
"bayes: detected bayes db format "
.
$self
->{db_version}.
", upgrading"
);
my
$main
=
$self
->{bayes}->{main};
my
$path
=
$main
->sed_path (
$main
->{conf}->{bayes_path});
my
$name
=
$path
.
'_toks'
;
my
$jpath
=
$self
->_get_journal_filename();
if
( -f
$jpath
) {
dbg(
"bayes: old journal file found, removing."
);
warn
"Couldn't remove $jpath: $!"
if
( !
unlink
$jpath
);
}
if
(
$self
->{db_version} < 2 ) {
dbg (
"bayes: upgrading database format from v"
.
$self
->{db_version}.
" to v2"
);
$self
->set_running_expire_tok();
my
(
$DB_NSPAM_MAGIC_TOKEN
,
$DB_NHAM_MAGIC_TOKEN
,
$DB_NTOKENS_MAGIC_TOKEN
);
my
(
$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN
,
$DB_LAST_EXPIRE_MAGIC_TOKEN
);
if
(
$self
->{db_version} == 0 ) {
$DB_NSPAM_MAGIC_TOKEN
=
'**NSPAM'
;
$DB_NHAM_MAGIC_TOKEN
=
'**NHAM'
;
$DB_NTOKENS_MAGIC_TOKEN
=
'**NTOKENS'
;
}
else
{
$DB_NSPAM_MAGIC_TOKEN
=
"\015\001\007\011\003NSPAM"
;
$DB_NHAM_MAGIC_TOKEN
=
"\015\001\007\011\003NHAM"
;
$DB_NTOKENS_MAGIC_TOKEN
=
"\015\001\007\011\003NTOKENS"
;
}
my
$started
=
time
;
my
$newatime
=
$started
;
my
%new_toks
;
$umask
=
umask
0;
$res
=
tie
%new_toks
,
"DB_File"
,
"${name}.new"
, O_RDWR|O_CREAT|O_EXCL,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666);
umask
$umask
;
return
0
unless
$res
;
undef
$res
;
$new_toks
{
$NSPAM_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NSPAM_MAGIC_TOKEN
};
$new_toks
{
$NHAM_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NHAM_MAGIC_TOKEN
};
$new_toks
{
$NTOKENS_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NTOKENS_MAGIC_TOKEN
};
$new_toks
{
$DB_VERSION_MAGIC_TOKEN
} = 2;
$new_toks
{
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
} =
$newatime
;
$new_toks
{
$LAST_EXPIRE_MAGIC_TOKEN
} =
$newatime
;
$new_toks
{
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
} =
$newatime
;
$new_toks
{
$LAST_JOURNAL_SYNC_MAGIC_TOKEN
} =
$newatime
;
$new_toks
{
$LAST_ATIME_DELTA_MAGIC_TOKEN
} = 0;
$new_toks
{
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN
} = 0;
my
(
$tok
,
$packed
);
my
$count
= 0;
while
((
$tok
,
$packed
) =
each
%{
$self
->{db_toks}}) {
next
if
(
$tok
=~ /^(?:\*\*[A-Z]+$|\015\001\007\011\003)/);
my
(
$ts
,
$th
,
$atime
) =
$self
->tok_unpack (
$packed
);
$new_toks
{
$tok
} =
$self
->tok_pack (
$ts
,
$th
,
$newatime
);
if
((
$count
++ % 1000) == 0) {
$self
->set_running_expire_tok();
}
}
untie
%{
$self
->{db_toks}};
untie
%new_toks
;
local
$SIG
{
'INT'
} =
'IGNORE'
;
local
$SIG
{
'TERM'
} =
'IGNORE'
;
local
$SIG
{
'HUP'
} =
'IGNORE'
if
(!Mail::SpamAssassin::Util::am_running_on_windows());
my
$msgc
=
$path
.
'_msgcount'
;
if
( -f
$msgc
) {
dbg(
"bayes: old msgcount file found, removing."
);
if
( !
unlink
$msgc
) {
warn
"Couldn't remove $msgc: $!"
;
}
}
for
my
$ext
(
@DB_EXTENSIONS
) {
my
$newf
=
$name
.
'.new'
.
$ext
;
my
$oldf
=
$name
.
$ext
;
next
unless
(-f
$newf
);
if
(!
rename
(
$newf
,
$oldf
)) {
warn
"rename $newf to $oldf failed: $!\n"
;
return
0;
}
}
$umask
=
umask
0;
$res
=
tie
%{
$self
->{db_toks}},
"DB_File"
,
$name
, O_RDWR|O_CREAT,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666);
umask
$umask
;
return
0
unless
$res
;
undef
$res
;
dbg (
"bayes: upgraded database format from v"
.
$self
->{db_version}.
" to v2 in "
.(
time
-
$started
).
" seconds"
);
$self
->{db_version} = 2;
}
if
(
$self
->{db_version} == 2 ) {
dbg (
"bayes: upgrading database format from v"
.
$self
->{db_version}.
" to v3"
);
$self
->set_running_expire_tok();
my
$DB_NSPAM_MAGIC_TOKEN
=
"\015\001\007\011\003NSPAM"
;
my
$DB_NHAM_MAGIC_TOKEN
=
"\015\001\007\011\003NHAM"
;
my
$DB_NTOKENS_MAGIC_TOKEN
=
"\015\001\007\011\003NTOKENS"
;
my
$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003OLDESTAGE"
;
my
$DB_LAST_EXPIRE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIRE"
;
my
$DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003NEWESTAGE"
;
my
$DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN
=
"\015\001\007\011\003LASTJOURNALSYNC"
;
my
$DB_LAST_ATIME_DELTA_MAGIC_TOKEN
=
"\015\001\007\011\003LASTATIMEDELTA"
;
my
$DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIREREDUCE"
;
my
$started
=
time
;
my
%new_toks
;
$umask
=
umask
0;
$res
=
tie
%new_toks
,
"DB_File"
,
"${name}.new"
, O_RDWR|O_CREAT|O_EXCL,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666);
umask
$umask
;
return
0
unless
$res
;
undef
$res
;
$new_toks
{
$NSPAM_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NSPAM_MAGIC_TOKEN
};
$new_toks
{
$NHAM_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NHAM_MAGIC_TOKEN
};
$new_toks
{
$NTOKENS_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NTOKENS_MAGIC_TOKEN
};
$new_toks
{
$DB_VERSION_MAGIC_TOKEN
} = 3;
$new_toks
{
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN
};
$new_toks
{
$LAST_EXPIRE_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_LAST_EXPIRE_MAGIC_TOKEN
};
$new_toks
{
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN
};
$new_toks
{
$LAST_JOURNAL_SYNC_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN
};
$new_toks
{
$LAST_ATIME_DELTA_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_LAST_ATIME_DELTA_MAGIC_TOKEN
};
$new_toks
{
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN
} =
$self
->{db_toks}->{
$DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN
};
my
$count
= 0;
while
(
my
(
$tok
,
$packed
) =
each
%{
$self
->{db_toks}}) {
next
if
(
$tok
=~ /^\015\001\007\011\003/);
my
$tok_hash
=
substr
(sha1(
$tok
), -5);
$new_toks
{
$tok_hash
} =
$packed
;
if
((
$count
++ % 1000) == 0) {
$self
->set_running_expire_tok();
}
}
untie
%{
$self
->{db_toks}};
untie
%new_toks
;
local
$SIG
{
'INT'
} =
'IGNORE'
;
local
$SIG
{
'TERM'
} =
'IGNORE'
;
local
$SIG
{
'HUP'
} =
'IGNORE'
if
(!Mail::SpamAssassin::Util::am_running_on_windows());
for
my
$ext
(
@DB_EXTENSIONS
) {
my
$newf
=
$name
.
'.new'
.
$ext
;
my
$oldf
=
$name
.
$ext
;
next
unless
(-f
$newf
);
if
(!
rename
(
$newf
,
$oldf
)) {
warn
"rename $newf to $oldf failed: $!\n"
;
return
0;
}
}
$umask
=
umask
0;
$res
=
tie
%{
$self
->{db_toks}},
"DB_File"
,
$name
, O_RDWR|O_CREAT,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666);
umask
$umask
;
return
0
unless
$res
;
undef
$res
;
dbg (
"bayes: upgraded database format from v"
.
$self
->{db_version}.
" to v3 in "
.(
time
-
$started
).
" seconds"
);
$self
->{db_version} = 3;
}
return
1;
}
sub
untie_db {
my
$self
=
shift
;
return
if
(!
$self
->{already_tied});
dbg(
"bayes: $$ untie-ing"
);
foreach
my
$dbname
(
@DBNAMES
) {
my
$db_var
=
'db_'
.
$dbname
;
if
(
exists
$self
->{
$db_var
}) {
dbg (
"bayes: $$ untie-ing $db_var"
);
untie
%{
$self
->{
$db_var
}};
delete
$self
->{
$db_var
};
}
}
if
(
$self
->{is_locked}) {
dbg (
"bayes: files locked, now unlocking lock"
);
$self
->{bayes}->{main}->{locker}->safe_unlock (
$self
->{locked_file});
$self
->{is_locked} = 0;
}
$self
->{already_tied} = 0;
$self
->{db_version} =
undef
;
}
sub
calculate_expire_delta {
my
(
$self
,
$newest_atime
,
$start
,
$max_expire_mult
) =
@_
;
my
%delta
= ();
my
(
$tok
,
$packed
);
while
((
$tok
,
$packed
) =
each
%{
$self
->{db_toks}}) {
next
if
(
$tok
=~ MAGIC_RE);
my
(
$ts
,
$th
,
$atime
) =
$self
->tok_unpack (
$packed
);
my
$token_age
=
$newest_atime
-
$atime
;
for
(
my
$i
= 1;
$i
<=
$max_expire_mult
;
$i
<<=1 ) {
if
(
$token_age
>=
$start
*
$i
) {
$delta
{
$i
}++;
}
else
{
last
;
}
}
}
return
%delta
;
}
sub
token_expiration {
my
(
$self
,
$opts
,
$newdelta
,
@vars
) =
@_
;
my
$deleted
= 0;
my
$kept
= 0;
my
$num_hapaxes
= 0;
my
$num_lowfreq
= 0;
my
$main
=
$self
->{bayes}->{main};
my
$path
=
$main
->sed_path (
$main
->{conf}->{bayes_path});
my
$tmpsuffix
=
"expire$$"
;
my
$tmpdbname
=
$path
.
'_toks.'
.
$tmpsuffix
;
for
my
$ext
(
@DB_EXTENSIONS
) {
unlink
(
$tmpdbname
.
$ext
); }
my
%new_toks
;
my
$umask
=
umask
0;
tie
%new_toks
,
"DB_File"
,
$tmpdbname
, O_RDWR|O_CREAT|O_EXCL,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666);
umask
$umask
;
my
$oldest
;
my
$showdots
=
$opts
->{showdots};
if
(
$showdots
) {
print
STDERR
"\n"
; }
$new_toks
{
$LAST_ATIME_DELTA_MAGIC_TOKEN
} =
$newdelta
;
my
$too_old
=
$vars
[10] -
$newdelta
;
my
(
$tok
,
$packed
);
while
((
$tok
,
$packed
) =
each
%{
$self
->{db_toks}}) {
next
if
(
$tok
=~ MAGIC_RE);
my
(
$ts
,
$th
,
$atime
) =
$self
->tok_unpack (
$packed
);
if
(
$atime
<
$too_old
) {
$deleted
++;
}
else
{
if
(
$atime
>
$vars
[10] ) {
$atime
=
$vars
[10];
}
$new_toks
{
$tok
} =
$self
->tok_pack (
$ts
,
$th
,
$atime
);
$kept
++;
if
(!
defined
(
$oldest
) ||
$atime
<
$oldest
) {
$oldest
=
$atime
; }
if
(
$ts
+
$th
== 1) {
$num_hapaxes
++;
}
elsif
(
$ts
< 8 &&
$th
< 8) {
$num_lowfreq
++;
}
}
if
(((
$kept
+
$deleted
) % 1000) == 0) {
if
(
$showdots
) {
print
STDERR
"."
; }
$self
->set_running_expire_tok();
}
}
$new_toks
{
$DB_VERSION_MAGIC_TOKEN
} =
$self
->DB_VERSION;
$new_toks
{
$NSPAM_MAGIC_TOKEN
} =
$vars
[1];
$new_toks
{
$NHAM_MAGIC_TOKEN
} =
$vars
[2];
$new_toks
{
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
} =
$vars
[10];
$new_toks
{
$NTOKENS_MAGIC_TOKEN
} =
$kept
;
$new_toks
{
$LAST_EXPIRE_MAGIC_TOKEN
} =
time
();
$new_toks
{
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
} =
$oldest
;
$new_toks
{
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN
} =
$deleted
;
if
(
$kept
< 100000) {
dbg(
"bayes: Token Expiration would expire too many tokens, aborting."
);
$self
->{db_toks}->{
$LAST_EXPIRE_MAGIC_TOKEN
} =
time
();
$self
->{db_toks}->{
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN
} = 0;
$self
->{db_toks}->{
$LAST_ATIME_DELTA_MAGIC_TOKEN
} = 0;
untie
%new_toks
;
for
my
$ext
(
@DB_EXTENSIONS
) {
unlink
(
$tmpdbname
.
$ext
); }
$kept
=
$vars
[3];
$deleted
= 0;
$num_hapaxes
= 0;
$num_lowfreq
= 0;
}
else
{
untie
%{
$self
->{db_toks}};
untie
%new_toks
;
{
local
$SIG
{
'INT'
} =
'IGNORE'
;
local
$SIG
{
'TERM'
} =
'IGNORE'
;
local
$SIG
{
'HUP'
} =
'IGNORE'
if
(!Mail::SpamAssassin::Util::am_running_on_windows());
for
my
$ext
(
@DB_EXTENSIONS
) {
my
$newf
=
$tmpdbname
.
$ext
;
my
$oldf
=
$path
.
'_toks'
.
$ext
;
next
unless
(-f
$newf
);
if
(!
rename
(
$newf
,
$oldf
)) {
warn
"rename $newf to $oldf failed: $!\n"
;
}
}
}
}
$self
->untie_db();
return
(
$kept
,
$deleted
,
$num_hapaxes
,
$num_lowfreq
);
}
sub
sync_due {
my
(
$self
) =
@_
;
return
0
if
(
$self
->{db_version} <
$self
->DB_VERSION );
my
$conf
=
$self
->{bayes}->{main}->{conf};
return
0
if
(
$conf
->{bayes_journal_max_size} == 0 );
my
@vars
=
$self
->get_storage_variables();
dbg(
"Bayes DB journal sync: last sync: "
.
$vars
[7],
'bayes'
,
'-1'
);
return
0
unless
(
stat
(
$self
->_get_journal_filename()) && -f _);
return
1
if
(-s _ >
$conf
->{bayes_journal_max_size});
return
1
if
((
$vars
[7] > 0) && (
time
-
$vars
[7] > 86400));
return
0;
}
sub
seen_get {
my
(
$self
,
$msgid
) =
@_
;
$self
->{db_seen}->{
$msgid
};
}
sub
seen_put {
my
(
$self
,
$msgid
,
$seen
) =
@_
;
if
(
$self
->{bayes}->{main}->{learn_to_journal}) {
$self
->defer_update (
"m $seen $msgid"
);
}
else
{
$self
->_seen_put_direct(
$msgid
,
$seen
);
}
}
sub
_seen_put_direct {
my
(
$self
,
$msgid
,
$seen
) =
@_
;
$self
->{db_seen}->{
$msgid
} =
$seen
;
}
sub
seen_delete {
my
(
$self
,
$msgid
) =
@_
;
if
(
$self
->{bayes}->{main}->{learn_to_journal}) {
$self
->defer_update (
"m f $msgid"
);
}
else
{
$self
->_seen_delete_direct(
$msgid
);
}
}
sub
_seen_delete_direct {
my
(
$self
,
$msgid
) =
@_
;
delete
$self
->{db_seen}->{
$msgid
};
}
sub
tok_get {
my
(
$self
,
$tok
) =
@_
;
$self
->tok_unpack (
$self
->{db_toks}->{
$tok
});
}
sub
tok_get_all {
my
(
$self
,
@tokens
) =
@_
;
my
@tokensdata
;
foreach
my
$token
(
@tokens
) {
my
(
$tok_spam
,
$tok_ham
,
$atime
) =
$self
->tok_unpack(
$self
->{db_toks}->{
$token
});
push
(
@tokensdata
, [
$token
,
$tok_spam
,
$tok_ham
,
$atime
]);
}
return
\
@tokensdata
;
}
sub
get_storage_variables {
my
(
$self
) =
@_
;
my
@values
;
my
$db_ver
=
$self
->{db_toks}->{
$DB_VERSION_MAGIC_TOKEN
};
if
( !
$db_ver
||
$db_ver
=~ /\D/ ) {
$db_ver
= 0; }
if
(
$db_ver
>= 2 ) {
my
$DB2_LAST_ATIME_DELTA_MAGIC_TOKEN
=
"\015\001\007\011\003LASTATIMEDELTA"
;
my
$DB2_LAST_EXPIRE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIRE"
;
my
$DB2_LAST_EXPIRE_REDUCE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIREREDUCE"
;
my
$DB2_LAST_JOURNAL_SYNC_MAGIC_TOKEN
=
"\015\001\007\011\003LASTJOURNALSYNC"
;
my
$DB2_NEWEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003NEWESTAGE"
;
my
$DB2_NHAM_MAGIC_TOKEN
=
"\015\001\007\011\003NHAM"
;
my
$DB2_NSPAM_MAGIC_TOKEN
=
"\015\001\007\011\003NSPAM"
;
my
$DB2_NTOKENS_MAGIC_TOKEN
=
"\015\001\007\011\003NTOKENS"
;
my
$DB2_OLDEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003OLDESTAGE"
;
my
$DB2_RUNNING_EXPIRE_MAGIC_TOKEN
=
"\015\001\007\011\003RUNNINGEXPIRE"
;
@values
= (
0,
$self
->{db_toks}->{
$DB2_NSPAM_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_NHAM_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_NTOKENS_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_LAST_EXPIRE_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_OLDEST_TOKEN_AGE_MAGIC_TOKEN
},
$db_ver
,
$self
->{db_toks}->{
$DB2_LAST_JOURNAL_SYNC_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_LAST_ATIME_DELTA_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_LAST_EXPIRE_REDUCE_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB2_NEWEST_TOKEN_AGE_MAGIC_TOKEN
},
);
}
elsif
(
$db_ver
== 0 ) {
my
$DB0_NSPAM_MAGIC_TOKEN
=
'**NSPAM'
;
my
$DB0_NHAM_MAGIC_TOKEN
=
'**NHAM'
;
my
$DB0_OLDEST_TOKEN_AGE_MAGIC_TOKEN
=
'**OLDESTAGE'
;
my
$DB0_LAST_EXPIRE_MAGIC_TOKEN
=
'**LASTEXPIRE'
;
my
$DB0_NTOKENS_MAGIC_TOKEN
=
'**NTOKENS'
;
my
$DB0_SCANCOUNT_BASE_MAGIC_TOKEN
=
'**SCANBASE'
;
@values
= (
$self
->{db_toks}->{
$DB0_SCANCOUNT_BASE_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB0_NSPAM_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB0_NHAM_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB0_NTOKENS_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB0_LAST_EXPIRE_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB0_OLDEST_TOKEN_AGE_MAGIC_TOKEN
},
0,
0,
0,
0,
0,
);
}
elsif
(
$db_ver
== 1 ) {
my
$DB1_NSPAM_MAGIC_TOKEN
=
"\015\001\007\011\003NSPAM"
;
my
$DB1_NHAM_MAGIC_TOKEN
=
"\015\001\007\011\003NHAM"
;
my
$DB1_OLDEST_TOKEN_AGE_MAGIC_TOKEN
=
"\015\001\007\011\003OLDESTAGE"
;
my
$DB1_LAST_EXPIRE_MAGIC_TOKEN
=
"\015\001\007\011\003LASTEXPIRE"
;
my
$DB1_NTOKENS_MAGIC_TOKEN
=
"\015\001\007\011\003NTOKENS"
;
my
$DB1_SCANCOUNT_BASE_MAGIC_TOKEN
=
"\015\001\007\011\003SCANBASE"
;
@values
= (
$self
->{db_toks}->{
$DB1_SCANCOUNT_BASE_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB1_NSPAM_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB1_NHAM_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB1_NTOKENS_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB1_LAST_EXPIRE_MAGIC_TOKEN
},
$self
->{db_toks}->{
$DB1_OLDEST_TOKEN_AGE_MAGIC_TOKEN
},
1,
0,
0,
0,
0,
);
}
foreach
(
@values
) {
if
( !
$_
||
$_
=~ /\D/ ) {
$_
= 0; }
}
return
@values
;
}
sub
dump_db_toks {
my
(
$self
,
$template
,
$regex
,
@vars
) =
@_
;
while
(
my
(
$tok
,
$tokvalue
) =
each
%{
$self
->{db_toks}}) {
next
if
(
$tok
=~ MAGIC_RE);
next
if
(
defined
$regex
&& (
$tok
!~ /
$regex
/o));
my
(
$ts
,
$th
,
$atime
) =
$self
->tok_unpack (
$tokvalue
);
my
$prob
=
$self
->{bayes}->compute_prob_for_token(
$tok
,
$vars
[1],
$vars
[2],
$ts
,
$th
);
$prob
||= 0.5;
my
$encoded_tok
=
unpack
(
"H*"
,
$tok
);
printf
$template
,
$prob
,
$ts
,
$th
,
$atime
,
$encoded_tok
;
}
}
sub
set_last_expire {
my
(
$self
,
$time
) =
@_
;
$self
->{db_toks}->{
$LAST_EXPIRE_MAGIC_TOKEN
} =
time
();
}
sub
get_running_expire_tok {
my
(
$self
) =
@_
;
my
$running
=
$self
->{db_toks}->{
$RUNNING_EXPIRE_MAGIC_TOKEN
};
if
(!
$running
||
$running
=~ /\D/) {
return
undef
; }
return
$running
;
}
sub
set_running_expire_tok {
my
(
$self
) =
@_
;
$self
->{bayes}->{main}->{locker}->refresh_lock (
$self
->{locked_file});
$self
->{db_toks}->{
$RUNNING_EXPIRE_MAGIC_TOKEN
} =
time
();
}
sub
remove_running_expire_tok {
my
(
$self
) =
@_
;
delete
$self
->{db_toks}->{
$RUNNING_EXPIRE_MAGIC_TOKEN
};
}
sub
tok_count_change {
my
(
$self
,
$ds
,
$dh
,
$tok
,
$atime
) =
@_
;
$atime
= 0
unless
defined
$atime
;
if
(
$self
->{bayes}->{main}->{learn_to_journal}) {
my
$encoded_tok
=
unpack
(
"H*"
,
$tok
);
$self
->defer_update (
"c $ds $dh $atime $encoded_tok"
);
}
else
{
$self
->tok_sync_counters (
$ds
,
$dh
,
$atime
,
$tok
);
}
}
sub
nspam_nham_get {
my
(
$self
) =
@_
;
my
@vars
=
$self
->get_storage_variables();
(
$vars
[1],
$vars
[2]);
}
sub
nspam_nham_change {
my
(
$self
,
$ds
,
$dh
) =
@_
;
if
(
$self
->{bayes}->{main}->{learn_to_journal}) {
$self
->defer_update (
"n $ds $dh"
);
}
else
{
$self
->tok_sync_nspam_nham (
$ds
,
$dh
);
}
}
sub
tok_touch {
my
(
$self
,
$tok
,
$atime
) =
@_
;
my
$encoded_tok
=
unpack
(
"H*"
,
$tok
);
$self
->defer_update (
"t $atime $encoded_tok"
);
}
sub
tok_touch_all {
my
(
$self
,
$tokens
,
$atime
) =
@_
;
foreach
my
$token
(@{
$tokens
}) {
my
$encoded_tok
=
unpack
(
"H*"
,
$token
);
$self
->defer_update (
"t $atime $encoded_tok"
);
}
}
sub
defer_update {
my
(
$self
,
$str
) =
@_
;
$self
->{string_to_journal} .=
"$str\n"
;
}
sub
cleanup {
my
(
$self
) =
@_
;
my
$nbytes
=
length
(
$self
->{string_to_journal});
return
if
(
$nbytes
== 0);
my
$path
=
$self
->_get_journal_filename();
my
$conf
=
$self
->{bayes}->{main}->{conf};
my
$umask
=
umask
(0777 - (
oct
(
$conf
->{bayes_file_mode}) & 0666));
if
(!
open
(OUT,
">>"
.
$path
)) {
warn
"cannot write to $path, Bayes db update ignored: $!\n"
;
umask
$umask
;
return
;
}
umask
$umask
;
my
$write_failure
= 0;
my
$original_point
=
tell
OUT;
my
$len
;
do
{
$len
=
syswrite
(OUT,
$self
->{string_to_journal},
$nbytes
);
if
(!
defined
$len
||
$len
< 0) {
$len
= 0
unless
(
defined
$len
);
warn
"write failed to Bayes journal $path ($len of $nbytes)!\n"
;
last
;
}
if
(
$len
!=
$nbytes
) {
warn
"partial write to Bayes journal $path ($len of $nbytes), recovering.\n"
;
if
(!
truncate
(OUT,
$original_point
) || (
$write_failure
++ > 4)) {
warn
"cannot write to Bayes journal $path, aborting!\n"
;
last
;
}
sleep
1;
}
}
while
(
$len
!=
$nbytes
);
if
(!
close
OUT) {
warn
"cannot write to $path, Bayes db update ignored\n"
;
}
$self
->{string_to_journal} =
''
;
}
sub
get_magic_re {
my
(
$self
) =
@_
;
if
( !
defined
$self
->{db_version} ||
$self
->{db_version} >= 1 ) {
return
MAGIC_RE;
}
return
qr/^\*\*[A-Z]+$/
;
}
sub
sync {
my
(
$self
,
$opts
) =
@_
;
return
$self
->_sync_journal(
$opts
);
}
sub
_sync_journal {
my
(
$self
,
$opts
) =
@_
;
my
$ret
= 0;
my
$path
=
$self
->_get_journal_filename();
if
( !
stat
(
$path
) || !-f _ || -z _ ) {
return
0; }
eval
{
local
$SIG
{
'__DIE__'
};
if
(
$self
->tie_db_writable()) {
$ret
=
$self
->_sync_journal_trapped(
$opts
,
$path
);
}
};
my
$err
= $@;
if
(!
$self
->{bayes}->{main}->{learn_caller_will_untie}) {
$self
->untie_db();
}
if
(
$err
) {
warn
"bayes: $err\n"
;
return
0;
}
$ret
;
}
sub
_sync_journal_trapped {
my
(
$self
,
$opts
,
$path
) =
@_
;
$self
->set_running_expire_tok();
my
$started
=
time
();
my
$count
= 0;
my
$total_count
= 0;
my
%tokens
= ();
my
$showdots
=
$opts
->{showdots};
my
$retirepath
=
$path
.
".old"
;
if
( !
stat
(
$path
) || !-f _ || -z _ ) {
return
0; }
if
(!-r
$path
) {
warn
"bayes: bad permissions on journal, can't read: $path\n"
;
return
0;
}
{
local
$SIG
{
'INT'
} =
'IGNORE'
;
local
$SIG
{
'TERM'
} =
'IGNORE'
;
local
$SIG
{
'HUP'
} =
'IGNORE'
if
(!Mail::SpamAssassin::Util::am_running_on_windows());
if
(!
rename
(
$path
,
$retirepath
)) {
warn
"bayes: failed rename $path to $retirepath\n"
;
return
0;
}
if
(!
open
(JOURNAL,
"<$retirepath"
)) {
warn
"bayes: cannot open read $retirepath\n"
;
return
0;
}
while
(<JOURNAL>) {
$total_count
++;
if
(/^t (\d+) (.+)$/) {
my
$tok
=
pack
(
"H*"
,$2);
$tokens
{
$tok
} = $1+0
if
( !
exists
$tokens
{
$tok
} || $1+0 >
$tokens
{
$tok
} );
}
elsif
(/^c (-?\d+) (-?\d+) (\d+) (.+)$/) {
my
$tok
=
pack
(
"H*"
,$4);
$self
->tok_sync_counters ($1+0, $2+0, $3+0,
$tok
);
$count
++;
}
elsif
(/^n (-?\d+) (-?\d+)$/) {
$self
->tok_sync_nspam_nham ($1+0, $2+0);
$count
++;
}
elsif
(/^m ([hsf]) (.+)$/) {
if
( $1 eq
"f"
) {
$self
->_seen_delete_direct($2);
}
else
{
$self
->_seen_put_direct($2,$1);
}
$count
++;
}
else
{
warn
"Bayes journal: gibberish entry found: $_"
;
}
}
close
JOURNAL;
while
(
my
(
$k
,
$v
) =
each
%tokens
) {
$self
->tok_touch_token (
$v
,
$k
);
if
((++
$count
% 1000) == 0) {
if
(
$showdots
) {
print
STDERR
"."
; }
$self
->set_running_expire_tok();
}
}
if
(
$showdots
) {
print
STDERR
"\n"
; }
unlink
(
$retirepath
) ||
warn
"bayes: can't unlink $retirepath: $!\n"
;
$self
->{db_toks}->{
$LAST_JOURNAL_SYNC_MAGIC_TOKEN
} =
$started
;
my
$done
=
time
();
my
$msg
= (
"synced Bayes databases from journal in "
.(
$done
-
$started
).
" seconds: $count unique entries ($total_count total entries)"
);
if
(
$opts
->{verbose}) {
print
$msg
,
"\n"
;
}
else
{
dbg (
$msg
);
}
}
return
1;
}
sub
tok_touch_token {
my
(
$self
,
$atime
,
$tok
) =
@_
;
my
(
$ts
,
$th
,
$oldatime
) =
$self
->tok_get (
$tok
);
return
if
(
$oldatime
>=
$atime
);
$self
->tok_put (
$tok
,
$ts
,
$th
,
$atime
);
}
sub
tok_sync_counters {
my
(
$self
,
$ds
,
$dh
,
$atime
,
$tok
) =
@_
;
my
(
$ts
,
$th
,
$oldatime
) =
$self
->tok_get (
$tok
);
$ts
+=
$ds
;
if
(
$ts
< 0) {
$ts
= 0; }
$th
+=
$dh
;
if
(
$th
< 0) {
$th
= 0; }
$atime
=
$oldatime
if
(
$oldatime
>
$atime
);
$self
->tok_put (
$tok
,
$ts
,
$th
,
$atime
);
}
sub
tok_put {
my
(
$self
,
$tok
,
$ts
,
$th
,
$atime
) =
@_
;
$ts
||= 0;
$th
||= 0;
return
if
(
$tok
=~ MAGIC_RE);
my
$exists_already
=
defined
$self
->{db_toks}->{
$tok
};
if
(
$ts
== 0 &&
$th
== 0) {
return
if
(!
$exists_already
);
$self
->{db_toks}->{
$NTOKENS_MAGIC_TOKEN
}--;
delete
$self
->{db_toks}->{
$tok
};
}
else
{
if
(!
$exists_already
) {
$self
->{db_toks}->{
$NTOKENS_MAGIC_TOKEN
}++;
}
$self
->{db_toks}->{
$tok
} =
$self
->tok_pack (
$ts
,
$th
,
$atime
);
my
$newmagic
=
$self
->{db_toks}->{
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
};
if
(!
defined
(
$newmagic
) ||
$atime
>
$newmagic
) {
$self
->{db_toks}->{
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
} =
$atime
;
}
my
$oldmagic
=
$self
->{db_toks}->{
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
};
if
(!
defined
(
$oldmagic
) ||
$oldmagic
eq
""
||
$atime
<
$oldmagic
) {
$self
->{db_toks}->{
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
} =
$atime
;
}
}
}
sub
tok_sync_nspam_nham {
my
(
$self
,
$ds
,
$dh
) =
@_
;
my
(
$ns
,
$nh
) = (
$self
->get_storage_variables())[1,2];
if
(
$ds
) {
$ns
+=
$ds
; }
if
(
$ns
< 0) {
$ns
= 0; }
if
(
$dh
) {
$nh
+=
$dh
; }
if
(
$nh
< 0) {
$nh
= 0; }
$self
->{db_toks}->{
$NSPAM_MAGIC_TOKEN
} =
$ns
;
$self
->{db_toks}->{
$NHAM_MAGIC_TOKEN
} =
$nh
;
}
sub
_get_journal_filename {
my
(
$self
) =
@_
;
my
$main
=
$self
->{bayes}->{main};
return
$main
->sed_path (
$main
->{conf}->{bayes_path}.
"_journal"
);
}
sub
perform_upgrade {
my
(
$self
,
$opts
) =
@_
;
my
$ret
= 0;
eval
{
local
$SIG
{
'__DIE__'
};
my
$main
=
$self
->{bayes}->{main};
my
$path
=
$main
->sed_path(
$main
->{conf}->{bayes_path});
my
$dir
= dirname(
$path
);
opendir
(DIR,
$dir
) ||
die
"can't opendir $dir: $!"
;
my
@files
=
grep
{ /^bayes_(?:seen|toks)(?:\.\w+)?$/ }
readdir
(DIR);
closedir
(DIR);
if
(
@files
< 2 || !
grep
(/bayes_seen/,
@files
) || !
grep
(/bayes_toks/,
@files
))
{
die
"unable to find bayes_toks and bayes_seen, stopping\n"
;
}
@files
=
map
{ /(.*)/, $1 }
@files
;
for
(
@files
) {
my
$src
=
"$dir/$_"
;
my
$dst
=
"$dir/old_$_"
;
copy(
$src
,
$dst
) ||
die
"can't copy $src to $dst: $!\n"
;
}
for
(
@files
) {
unlink
(
"$dir/$_"
); }
if
(
$self
->tie_db_writable()) {
$ret
+=
$self
->upgrade_old_dbm_files_trapped(
"$dir/old_bayes_seen"
,
$self
->{db_seen});
$ret
+=
$self
->upgrade_old_dbm_files_trapped(
"$dir/old_bayes_toks"
,
$self
->{db_toks});
}
if
(
$ret
== 2) {
print
"import successful, original files saved with \"old\" prefix\n"
;
}
else
{
print
"import failed, original files saved with \"old\" prefix\n"
;
}
};
my
$err
= $@;
$self
->untie_db();
if
(
$err
) {
warn
"bayes perform_upgrade: $err\n"
;
return
0;
}
$ret
;
}
sub
upgrade_old_dbm_files_trapped {
my
(
$self
,
$filename
,
$output
) =
@_
;
my
$count
;
my
%in
;
print
"upgrading to DB_File, please be patient: $filename\n"
;
for
my
$dbm
(
'DB_File'
,
'GDBM_File'
,
'NDBM_File'
,
'SDBM_File'
) {
$count
= 0;
eval
'use '
.
$dbm
. ';
tie
%in
,
"' . $dbm . '"
,
$filename
, O_RDONLY, 0600;
%{
$output
} =
%in
;
$count
=
scalar
keys
%{
$output
};
untie
%in
;
';
if
($@) {
print
"$dbm: $dbm module not installed, nothing copied.\n"
;
dbg(
"error was: $@"
);
}
elsif
(
$count
== 0) {
print
"$dbm: no database of that kind found, nothing copied.\n"
;
}
else
{
print
"$dbm: copied $count entries.\n"
;
return
1;
}
}
return
0;
}
sub
clear_database {
my
(
$self
) =
@_
;
return
0
unless
(
$self
->tie_db_writable());
my
$path
=
$self
->{bayes}->{main}->sed_path (
$self
->{bayes}->{main}->{conf}->{bayes_path});
foreach
my
$dbname
(
@DBNAMES
,
'journal'
) {
my
$name
=
$path
.
'_'
.
$dbname
;
unlink
$name
;
dbg(
"bayes: clear_database: removing $dbname"
);
}
$self
->untie_db();
return
1;
}
sub
backup_database {
my
(
$self
) =
@_
;
return
0
unless
(
$self
->tie_db_writable());
my
@vars
=
$self
->get_storage_variables();
print
"v\t$vars[6]\tdb_version # this must be the first line!!!\n"
;
print
"v\t$vars[1]\tnum_spam\n"
;
print
"v\t$vars[2]\tnum_nonspam\n"
;
while
(
my
(
$tok
,
$packed
) =
each
%{
$self
->{db_toks}}) {
next
if
(
$tok
=~ MAGIC_RE);
my
(
$ts
,
$th
,
$atime
) =
$self
->tok_unpack(
$packed
);
my
$encoded_token
=
unpack
(
"H*"
,
$tok
);
print
"t\t$ts\t$th\t$atime\t$encoded_token\n"
;
}
while
(
my
(
$msgid
,
$flag
) =
each
%{
$self
->{db_seen}}) {
print
"s\t$flag\t$msgid\n"
;
}
$self
->untie_db();
return
1;
}
sub
restore_database {
my
(
$self
,
$filename
,
$showdots
) =
@_
;
if
(!
open
(DUMPFILE,
'<'
,
$filename
)) {
dbg(
"bayes: Unable to open backup file $filename: $!"
);
return
0;
}
if
(!
$self
->tie_db_writable()) {
dbg(
"bayes: failed to tie db writable"
);
return
0;
}
my
$main
=
$self
->{bayes}->{main};
my
$path
=
$main
->sed_path (
$main
->{conf}->{bayes_path});
my
$tmpsuffix
=
"convert$$"
;
my
$tmptoksdbname
=
$path
.
'_toks.'
.
$tmpsuffix
;
my
$tmpseendbname
=
$path
.
'_seen.'
.
$tmpsuffix
;
my
$toksdbname
=
$path
.
'_toks'
;
my
$seendbname
=
$path
.
'_seen'
;
my
%new_toks
;
my
%new_seen
;
my
$umask
=
umask
0;
unless
(
tie
%new_toks
,
"DB_File"
,
$tmptoksdbname
, O_RDWR|O_CREAT|O_EXCL,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666)) {
dbg(
"bayes: Failed to tie temp toks db: $!"
);
$self
->untie_db();
umask
$umask
;
return
0;
}
unless
(
tie
%new_seen
,
"DB_File"
,
$tmpseendbname
, O_RDWR|O_CREAT|O_EXCL,
(
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666)) {
dbg(
"bayes: Failed to tie temp seen db: $!"
);
untie
%new_toks
;
unlink
$tmptoksdbname
;
$self
->untie_db();
umask
$umask
;
return
0;
}
umask
$umask
;
my
$line_count
= 0;
my
$db_version
;
my
$token_count
= 0;
my
$num_spam
;
my
$num_ham
;
my
$error_p
= 0;
my
$newest_token_age
= 0;
my
$oldest_token_age
=
time
() + 100000;
my
$line
= <DUMPFILE>;
$line_count
++;
if
(
$line
=~ m/^v\s+(\d+)\s+db_version/) {
$db_version
= $1;
}
else
{
dbg(
"bayes: Database Version must be the first line in the backup file, correct and re-run."
);
untie
%new_toks
;
untie
%new_seen
;
unlink
$tmptoksdbname
;
unlink
$tmpseendbname
;
$self
->untie_db();
return
0;
}
unless
(
$db_version
== 2 ||
$db_version
== 3) {
warn
(
"bayes: Database Version $db_version is unsupported, must be version 2 or 3."
);
untie
%new_toks
;
untie
%new_seen
;
unlink
$tmptoksdbname
;
unlink
$tmpseendbname
;
$self
->untie_db();
return
0;
}
while
(
my
$line
= <DUMPFILE>) {
chomp
(
$line
);
$line_count
++;
if
(
$line_count
% 1000 == 0) {
print
STDERR
"."
if
(
$showdots
);
}
if
(
$line
=~ /^v\s+/) {
my
@parsed_line
=
split
(/\s+/,
$line
, 3);
my
$value
=
$parsed_line
[1] + 0;
if
(
$parsed_line
[2] eq
'num_spam'
) {
$num_spam
=
$value
;
}
elsif
(
$parsed_line
[2] eq
'num_nonspam'
) {
$num_ham
=
$value
;
}
else
{
dbg(
"bayes: restore_database: Skipping unknown line: $line"
);
}
}
elsif
(
$line
=~ /^t\s+/) {
my
@parsed_line
=
split
(/\s+/,
$line
, 5);
my
$spam_count
=
$parsed_line
[1] + 0;
my
$ham_count
=
$parsed_line
[2] + 0;
my
$atime
=
$parsed_line
[3] + 0;
my
$token
=
$parsed_line
[4];
my
$token_warn_p
= 0;
my
@warnings
;
if
(
$spam_count
< 0) {
$spam_count
= 0;
push
(
@warnings
,
'Spam Count < 0, resetting'
);
$token_warn_p
= 1;
}
if
(
$ham_count
< 0) {
$ham_count
= 0;
push
(
@warnings
,
'Ham Count < 0, resetting'
);
$token_warn_p
= 1;
}
if
(
$spam_count
== 0 &&
$ham_count
== 0) {
dbg(
"bayes: Token has zero spam and ham count, skipping."
);
next
;
}
if
(
$atime
>
time
()) {
$atime
=
time
();
push
(
@warnings
,
'atime > current time, resetting'
);
$token_warn_p
= 1;
}
if
(
$token_warn_p
) {
dbg(
"bayes: Token ($token) has the following warnings:\n"
.
join
(
"\n"
,
@warnings
));
}
if
(
$db_version
< 3) {
$token
=
substr
(sha1(
$token
), -5);
}
else
{
$token
=
pack
(
"H*"
,
$token
);
}
$new_toks
{
$token
} =
$self
->tok_pack(
$spam_count
,
$ham_count
,
$atime
);
if
(
$atime
<
$oldest_token_age
) {
$oldest_token_age
=
$atime
;
}
if
(
$atime
>
$newest_token_age
) {
$newest_token_age
=
$atime
;
}
$token_count
++;
}
elsif
(
$line
=~ /^s\s+/) {
my
@parsed_line
=
split
(/\s+/,
$line
, 3);
my
$flag
=
$parsed_line
[1];
my
$msgid
=
$parsed_line
[2];
unless
(
$flag
eq
'h'
||
$flag
eq
's'
) {
dbg(
"bayes: Unknown seen flag ($flag) for line: $line, skipping"
);
next
;
}
unless
(
$msgid
) {
dbg(
"bayes: Blank msgid for line: $line, skipping"
);
next
;
}
$new_seen
{
$msgid
} =
$flag
;
}
else
{
dbg(
"bayes: Skipping unknown line: $line"
);
next
;
}
}
close
(DUMPFILE);
print
STDERR
"\n"
if
(
$showdots
);
unless
(
defined
(
$num_spam
)) {
dbg(
"bayes: Unable to find num spam, please check file."
);
$error_p
= 1;
}
unless
(
defined
(
$num_ham
)) {
dbg(
"bayes: Unable to find num ham, please check file."
);
$error_p
= 1;
}
if
(
$error_p
) {
dbg(
"bayes: Error(s) while attempting to load $filename, correct and Re-Run"
);
untie
%new_toks
;
untie
%new_seen
;
unlink
$tmptoksdbname
;
unlink
$tmpseendbname
;
$self
->untie_db();
return
0;
}
$new_toks
{
$DB_VERSION_MAGIC_TOKEN
} =
$self
->DB_VERSION();
$new_toks
{
$NTOKENS_MAGIC_TOKEN
} =
$token_count
;
$new_toks
{
$NSPAM_MAGIC_TOKEN
} =
$num_spam
;
$new_toks
{
$NHAM_MAGIC_TOKEN
} =
$num_ham
;
$new_toks
{
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
} =
$newest_token_age
;
$new_toks
{
$OLDEST_TOKEN_AGE_MAGIC_TOKEN
} =
$oldest_token_age
;
$new_toks
{
$LAST_EXPIRE_MAGIC_TOKEN
} = 0;
$new_toks
{
$LAST_JOURNAL_SYNC_MAGIC_TOKEN
} = 0;
$new_toks
{
$LAST_ATIME_DELTA_MAGIC_TOKEN
} = 0;
$new_toks
{
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN
} = 0;
local
$SIG
{
'INT'
} =
'IGNORE'
;
local
$SIG
{
'TERM'
} =
'IGNORE'
;
local
$SIG
{
'HUP'
} =
'IGNORE'
if
(!Mail::SpamAssassin::Util::am_running_on_windows());
untie
%new_toks
;
untie
%new_seen
;
$self
->untie_db();
unless
(
rename
(
$tmptoksdbname
,
$toksdbname
)) {
dbg(
"bayes: Error while renaming $tmptoksdbname to $toksdbname: $!"
);
return
0;
}
unless
(
rename
(
$tmpseendbname
,
$seendbname
)) {
dbg(
"bayes: Error while renaming $tmpseendbname to $seendbname: $!"
);
dbg(
"bayes: Database now in inconsistent state."
);
return
0;
}
dbg(
"bayes: Parsed $line_count lines."
);
dbg(
"bayes: Created database with $token_count tokens based on $num_spam Spam Messages and $num_ham Ham Messages."
);
return
1;
}
sub
tok_unpack {
my
(
$self
,
$value
) =
@_
;
$value
||= 0;
my
(
$packed
,
$atime
);
if
(
$self
->{db_version} >= 1 ) {
(
$packed
,
$atime
) =
unpack
(
"CV"
,
$value
);
}
elsif
(
$self
->{db_version} == 0 ) {
(
$packed
,
$atime
) =
unpack
(
"CS"
,
$value
);
}
if
((
$packed
& FORMAT_FLAG) == ONE_BYTE_FORMAT) {
return
((
$packed
& ONE_BYTE_SSS_BITS) >> 3,
$packed
& ONE_BYTE_HHH_BITS,
$atime
|| 0);
}
elsif
((
$packed
& FORMAT_FLAG) == TWO_LONGS_FORMAT) {
my
(
$packed
,
$ts
,
$th
,
$atime
);
if
(
$self
->{db_version} >= 1 ) {
(
$packed
,
$ts
,
$th
,
$atime
) =
unpack
(
"CVVV"
,
$value
);
}
elsif
(
$self
->{db_version} == 0 ) {
(
$packed
,
$ts
,
$th
,
$atime
) =
unpack
(
"CLLS"
,
$value
);
}
return
(
$ts
|| 0,
$th
|| 0,
$atime
|| 0);
}
else
{
warn
"unknown packing format for Bayes db, please re-learn: $packed"
;
return
(0, 0, 0);
}
}
sub
tok_pack {
my
(
$self
,
$ts
,
$th
,
$atime
) =
@_
;
$ts
||= 0;
$th
||= 0;
$atime
||= 0;
if
(
$ts
< 8 &&
$th
< 8) {
return
pack
(
"CV"
, ONE_BYTE_FORMAT | (
$ts
<< 3) |
$th
,
$atime
);
}
else
{
return
pack
(
"CVVV"
, TWO_LONGS_FORMAT,
$ts
,
$th
,
$atime
);
}
}
sub
db_readable {
my
(
$self
) =
@_
;
return
$self
->{already_tied};
}
sub
db_writable {
my
(
$self
) =
@_
;
return
$self
->{already_tied} &&
$self
->{is_locked};
}
sub
dbg { Mail::SpamAssassin::dbg (
@_
); }
sub
sa_die { Mail::SpamAssassin::sa_die (
@_
); }
1;