our
@ISA
=
qw( Mail::SpamAssassin::BayesStore )
;
my
$rmw
= DB_RMW;
my
$next
= DB_NEXT;
sub
new {
my
$class
=
shift
;
$class
=
ref
(
$class
) ||
$class
;
my
$self
=
$class
->SUPER::new(
@_
);
$self
->{supported_db_version} = 3;
$self
->{is_really_open} = 0;
$self
->{is_writable} = 0;
$self
->{is_officially_open} = 0;
return
$self
;
}
sub
DESTROY {
my
$self
=
shift
;
$self
->_close_db;
}
sub
tie_db_readonly {
my
(
$self
) =
@_
;
my
$result
=
$self
->{is_really_open} ||
$self
->_open_db(0);
dbg(
"bayes: tie_db_readonly, result is $result"
);
return
$result
;
}
sub
tie_db_writable {
my
(
$self
) =
@_
;
my
$result
= (
$self
->{is_really_open} &&
$self
->{is_writable})
||
$self
->_open_db(1);
dbg(
"bayes: tie_db_writable, result is $result"
);
return
$result
;
}
sub
_open_db {
my
(
$self
,
$writable
) =
@_
;
dbg(
"bayes: _open_db(%s, %s); BerkeleyDB %s, libdb %s"
,
$writable
?
'for writing'
:
'for reading'
,
$self
->{is_really_open} ?
'already open'
:
'not yet open'
,
BerkeleyDB->VERSION,
$BerkeleyDB::db_version
);
$self
->{is_writable} =
$writable
;
return
1
if
$self
->{is_really_open};
my
$main
=
$self
->{bayes}->{main};
if
(!
defined
(
$main
->{conf}->{bayes_path})) {
dbg(
"bayes: bayes_path not defined"
);
return
0;
}
$self
->read_db_configs();
my
$path
= dirname
$main
->sed_path(
$main
->{conf}->{bayes_path});
if
(-d
$path
) {
}
elsif
(
$writable
) {
eval
{
mkpath(
$path
, 0, (
oct
(
$main
->{conf}->{bayes_file_mode}) & 0777));
};
warn
(
"bayes: Couldn't create path: $@"
)
if
$@;
}
else
{
warn
(
"bayes: bayes_path doesn't exist and can't create: $path"
);
return
0;
}
my
$flags
= DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN;
$flags
|= DB_CREATE
if
$writable
;
dbg(
"bayes: %s environment: %s, 0x%x, %s"
,
$writable
?
'Opening or creating'
:
'Opening existing'
,
$path
,
$flags
,
$main
->{conf}->{bayes_file_mode});
unless
(
$self
->{env} = BerkeleyDB::Env->new(
-Cachesize
=> 67108864,
-Home
=>
$path
,
-Flags
=>
$flags
,
-Mode
=> (
oct
(
$main
->{conf}->{bayes_file_mode}) & 0666),
)) {
dbg(
"bayes: berkeleydb environment couldn't initialize: $BerkeleyDB::Error"
);
return
0;
}
$flags
=
$writable
? DB_CREATE : 0;
unless
(
$self
->{handles}->{vars} = BerkeleyDB::Btree->new(
-Env
=>
$self
->{env},
-Filename
=>
"vars.db"
,
-Flags
=>
$flags
)) {
warn
(
"bayes: couldn't open vars.db: $BerkeleyDB::Error"
);
delete
$self
->{handles}->{vars};
$self
->untie_db;
return
0;
}
unless
(
$self
->{db_version} =
$self
->_get(
vars
=>
"DB_VERSION"
)) {
if
(
$writable
) {
$self
->{db_version} =
$self
->DB_VERSION;
$self
->{handles}->{vars}->db_put(
DB_VERSION
=>
$self
->{db_version}) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
NTOKENS
=> 0) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
dbg(
"bayes: new db, set db version %s and 0 tokens"
,
$self
->{db_version});
}
else
{
warn
(
"bayes: vars.db not initialized: $BerkeleyDB::Error"
);
$self
->untie_db;
return
0;
}
}
elsif
(
$self
->{db_version}) {
dbg(
"bayes: found bayes db version $self->{db_version}"
);
if
(
$self
->{db_version} !=
$self
->DB_VERSION) {
warn
(
"bayes: bayes db version $self->{db_version} is not able to be used, aborting: $BerkeleyDB::Error"
);
$self
->untie_db();
return
0;
}
}
unless
(
$self
->{handles}->{tokens} = BerkeleyDB::Btree->new(
-Env
=>
$self
->{env},
-Filename
=>
"tokens.db"
,
-Flags
=>
$flags
,
-Property
=> DB_REVSPLITOFF)) {
warn
(
"bayes: couldn't open tokens.db: $BerkeleyDB::Error"
);
delete
$self
->{handles}->{tokens};
$self
->untie_db;
return
0;
}
unless
(
$self
->{handles}->{atime} = BerkeleyDB::Btree->new(
-Env
=>
$self
->{env},
-Filename
=>
"atime.db"
,
-Flags
=>
$flags
,
-Property
=> DB_DUP|DB_DUPSORT)) {
warn
(
"bayes: couldn't open atime.db: $BerkeleyDB::Error"
);
delete
$self
->{handles}->{atime};
$self
->untie_db;
return
0;
}
unless
(
$self
->{handles}->{seen} = BerkeleyDB::Btree->new(
-Env
=>
$self
->{env},
-Filename
=>
"seen.db"
,
-Flags
=>
$flags
)) {
warn
(
"bayes: couldn't open tokens.db: $BerkeleyDB::Error"
);
delete
$self
->{handles}->{seen};
$self
->untie_db;
return
0;
}
!
$self
->{handles}->{tokens}->associate(
$self
->{handles}->{atime},
\
&_extract_atime
)
or
die
"Couldn't associate DBs: $BerkeleyDB::Error"
;
$self
->{is_really_open} = 1;
$self
->{is_officially_open} = 1;
dbg(
"bayes: _open_db done"
);
return
1;
}
sub
untie_db {
my
$self
=
shift
;
dbg(
"bayes: pretend to be closing a database"
);
$self
->{is_writable} = 0;
$self
->{is_officially_open} = 0;
$self
->{env}->txn_checkpoint(128, 1)
if
$self
->{env};
for
my
$handle
(
keys
%{
$self
->{handles}}) {
my
$handles
=
$self
->{handles};
if
(
defined
$handles
&&
$handles
->{
$handle
}) {
$handles
->{
$handle
}->db_sync == 0
or
die
"Couldn't sync $handle: $BerkeleyDB::Error"
;
}
}
return
;
}
sub
_close_db {
my
$self
=
shift
;
dbg(
"bayes: really closing a database"
);
$self
->{is_writable} = 0;
$self
->{is_really_open} = 0;
$self
->{is_officially_open} = 0;
$self
->{db_version} =
undef
;
for
my
$handle
(
keys
%{
$self
->{handles}}) {
my
$handles
=
$self
->{handles};
if
(
defined
$handles
&&
$handles
->{
$handle
}) {
dbg(
"bayes: closing database $handle"
);
eval
{
$handles
->{
$handle
}->db_close };
}
delete
$handles
->{
$handle
};
}
delete
$self
->{env};
return
;
}
sub
calculate_expire_delta {
my
(
$self
,
$newest_atime
,
$start
,
$max_expire_mult
) =
@_
;
dbg(
"bayes: calculate_expire_delta starting"
);
my
%delta
;
my
$cursor
=
$self
->{handles}->{atime}->db_cursor;
$cursor
or
die
"Couldn't get cursor: $BerkeleyDB::Error"
;
my
(
$atime
,
$value
) = (
""
,
""
);
while
(
$cursor
->c_get(
$atime
,
$value
,
$next
) == 0) {
my
$age
=
$newest_atime
-
$atime
;
for
(
my
$i
= 1;
$i
<=
$max_expire_mult
;
$i
<<= 1) {
if
(
$age
>=
$start
*
$i
) {
$delta
{
$i
}++;
}
else
{
last
;
}
}
}
$cursor
->c_close == 0
or
die
"Couldn't close cursor: $BerkeleyDB::Error"
;
undef
$cursor
;
dbg(
"bayes: calculate_expire_delta done"
);
return
%delta
;
}
sub
token_expiration {
my
(
$self
,
$opts
,
$newdelta
,
@vars
) =
@_
;
dbg(
"bayes: Entering token_expiration"
);
my
(
$kept
,
$deleted
,
$hapaxes
,
$lowfreq
) = (0, 0, 0, 0);
{
my
$cursor
=
$self
->{handles}->{atime}->db_cursor;
$cursor
or
die
"Couldn't get cursor: $BerkeleyDB::Error"
;
my
(
$atime
,
$flag
) = (
$vars
[10], DB_SET_RANGE|
$rmw
);
while
(
$cursor
->c_pget(
$atime
,
my
$token
,
my
$value
,
$flag
) == 0) {
my
(
$ts
,
$th
,
$current
) = _unpack_token(
$value
);
$self
->{handles}->{tokens}->db_put(
$token
,
_pack_token(
$ts
,
$th
,
$atime
)) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$flag
=
$next
|
$rmw
;
}
$cursor
->c_close == 0
or
die
"Couldn't close cursor: $BerkeleyDB::Error"
;
undef
$cursor
;
}
my
$too_old
=
$vars
[10] -
$newdelta
;
dbg(
"bayes: Too old is $too_old"
);
dbg(
"bayes: Getting db stats"
);
my
$count
;
{
my
$stats
=
$self
->{handles}->{atime}->db_stat(DB_FAST_STAT);
$stats
=
$self
->{handles}->{atime}->db_stat
if
$stats
->{bt_ndata} == 0;
if
(
$self
->{handles}->{atime}->db_key_range(
$too_old
,
my
$less
,
my
$equal
,
my
$greater
) == 0) {
dbg(
"bayes: less is $less, equal is $equal, greater is $greater"
);
$count
=
$stats
->{bt_ndata} -
$stats
->{bt_ndata} *
$greater
;
}
}
dbg(
"bayes: Considering deleting $vars[3], $count"
);
if
(
$vars
[3] -
$count
>= 100000) {
dbg(
"bayes: Preparing to iterate"
);
my
$cursor
=
$self
->{handles}->{atime}->db_cursor;
$cursor
or
die
"Couldn't get cursor: $BerkeleyDB::Error"
;
my
(
$atime
,
$oldest
,
$token
,
$value
);
$atime
= 0;
while
(
$cursor
->c_pget(
$atime
,
$token
,
$value
,
$next
) == 0) {
$oldest
=
$atime
,
last
if
$atime
>=
$too_old
;
dbg(
"bayes: Deleting record"
);
$cursor
->c_del;
$deleted
++;
my
(
$ts
,
$th
,
$atime
) = _unpack_token(
$value
);
if
(
$ts
+
$th
== 1) {
$hapaxes
++;
}
elsif
(
$ts
< 8 &&
$th
< 8) {
$lowfreq
++;
}
}
dbg(
"bayes: Done with cursor"
);
$cursor
->c_close == 0
or
die
"Couldn't close cursor: $BerkeleyDB::Error"
;
undef
$cursor
;
$kept
=
$self
->_get(
vars
=>
"NTOKENS"
,
$rmw
) -
$deleted
;
$self
->{handles}->{vars}->db_put(
NTOKENS
=>
$kept
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
LAST_EXPIRE
=>
time
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
OLDEST_TOKEN_AGE
=>
$oldest
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
LAST_EXPIRE_REDUCE
=>
$deleted
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
LAST_ATIME_DELTA
=>
$newdelta
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
else
{
dbg(
"bayes: Update vars to regenerate histogram"
);
$kept
=
$self
->_get(
vars
=>
"NTOKENS"
);
$self
->{handles}->{vars}->db_put(
LAST_EXPIRE
=>
time
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
LAST_ATIME_DELTA
=> 0) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$self
->{handles}->{vars}->db_put(
LAST_EXPIRE_REDUCE
=> 0) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
dbg(
"bayes: token_expiration done"
);
return
(
$kept
,
$deleted
,
$hapaxes
,
$lowfreq
);
}
sub
sync_due {
return
0;
}
sub
seen_get {
my
(
$self
,
$msgid
) =
@_
;
dbg(
"bayes: Entering seen_get"
);
my
$value
=
$self
->_get(
seen
=>
$msgid
);
return
$value
;
}
sub
seen_put {
my
(
$self
,
$msgid
,
$flag
) =
@_
;
dbg(
"bayes: Entering seen_put"
);
$self
->{handles}->{seen}->db_put(
$msgid
,
$flag
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
return
1;
}
sub
seen_delete {
my
(
$self
,
$msgid
) =
@_
;
dbg(
"bayes: Entering seen_delete"
);
my
$result
;
my
$status
=
$self
->{handles}->{seen}->db_del(
$msgid
);
if
(
$status
== 0) {
$result
= 1;
}
elsif
(
$status
== DB_NOTFOUND) {
$result
= 0E0;
}
else
{
die
"Couldn't delete record: $BerkeleyDB::Error"
;
}
return
$result
;
}
sub
get_storage_variables {
my
(
$self
) =
@_
;
dbg(
"bayes: get_storage_variables starting"
);
my
@values
;
for
my
$token
(
qw{LAST_JOURNAL_SYNC NSPAM NHAM NTOKENS LAST_EXPIRE
OLDEST_TOKEN_AGE DB_VERSION LAST_JOURNAL_SYNC
LAST_ATIME_DELTA LAST_EXPIRE_REDUCE NEWEST_TOKEN_AGE}
) {
my
$value
=
$self
->_get(
vars
=>
$token
);
$value
= 0
unless
$value
&&
$value
=~ /\d+/;
push
@values
,
$value
;
}
dbg(
"bayes: get_storage_variables done"
);
return
@values
;
}
sub
dump_db_toks { dump_tokens(
@_
) }
sub
dump_tokens {
my
(
$self
,
$template
,
$regex
,
@vars
) =
@_
;
dbg(
"bayes: dump_tokens starting"
);
if
(
defined
$regex
) {
my
(
$rec
,
$err
) = compile_regexp(
$regex
, 2);
if
(!
$rec
) {
die
"Invalid dump_tokens regex '$regex': $err\n"
;
}
$regex
=
$rec
;
}
my
$cursor
=
$self
->{handles}->{tokens}->db_cursor;
$cursor
or
die
"Couldn't get cursor: $BerkeleyDB::Error"
;
my
(
$token
,
$value
) = (
""
,
""
);
while
(
$cursor
->c_get(
$token
,
$value
,
$next
) == 0) {
next
if
defined
$regex
&&
$token
!~ /
$regex
/o;
my
(
$ts
,
$th
,
$atime
) = _unpack_token(
$value
);
my
$prob
=
$self
->{bayes}->_compute_prob_for_token(
$token
,
$vars
[1],
$vars
[2],
$ts
,
$th
) || 0.5;
my
$encoded
=
unpack
(
"H*"
,
$token
);
printf
$template
,
$prob
,
$ts
,
$th
,
$atime
,
$encoded
;
}
$cursor
->c_close == 0
or
die
"Couldn't close cursor: $BerkeleyDB::Error"
;
undef
$cursor
;
dbg(
"bayes: dump_tokens done"
);
return
1;
}
sub
set_last_expire {
my
(
$self
,
$time
) =
@_
;
dbg(
"bayes: Entering set_last_expire"
);
$self
->{handles}->{vars}->db_put(
LAST_EXPIRE
=>
$time
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
return
1;
}
sub
get_running_expire_tok {
my
(
$self
) =
@_
;
dbg(
"bayes: Entering get_running_expire_tok"
);
my
$value
=
$self
->_get(
vars
=>
"RUNNING_EXPIRE"
) ||
""
;
my
$result
;
$result
=
$value
if
$value
=~ /^\d+$/;
dbg(
"bayes: get_running_expire_tok exiting with %s"
,
!
defined
$result
?
'UNDEF'
:
$result
);
return
$result
;
}
sub
set_running_expire_tok {
my
(
$self
) =
@_
;
my
$time
=
time
;
$self
->{handles}->{vars}->db_put(
RUNNING_EXPIRE
=>
$time
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
return
$time
;
}
sub
remove_running_expire_tok {
my
(
$self
) =
@_
;
my
$status
=
$self
->{handles}->{vars}->db_del(
"RUNNING_EXPIRE"
);
my
$result
;
if
(
$status
== 0) {
$result
= 1;
}
elsif
(
$status
== DB_NOTFOUND) {
$result
= 0E0;
}
else
{
die
"Couldn't delete record: $BerkeleyDB::Error"
;
}
return
$result
;
}
sub
tok_get {
my
(
$self
,
$token
) =
@_
;
dbg(
"bayes: Entering tok_get"
);
my
$array
=
$self
->tok_get_all(
$token
);
return
!
@$array
? () : (@{
$array
->[0]})[1,2,3];
}
sub
tok_get_all {
my
(
$self
,
@keys
) =
@_
;
my
@results
=
$self
->_mget(
tokens
=> \
@keys
);
my
@values
;
for
my
$token
(
@keys
) {
my
$value
=
shift
(
@results
);
push
(
@values
, [
$token
, _unpack_token(
$value
)])
if
defined
$value
;
}
dbg(
"bayes: tok_get_all found %d tokens out of %d search keys"
,
scalar
(
@values
),
scalar
(
@keys
));
return
\
@values
;
}
sub
tok_count_change {
my
(
$self
,
$dspam
,
$dham
,
$token
,
$newatime
) =
@_
;
dbg(
"bayes: Entering tok_count_change"
);
$self
->multi_tok_count_change(
$dspam
,
$dham
, {
$token
=> 1},
$newatime
);
}
sub
multi_tok_count_change {
my
(
$self
,
$dspam
,
$dham
,
$tokens
,
$newatime
) =
@_
;
$dspam
||= 0;
$dham
||= 0;
$newatime
||= 0;
return
1
unless
(
$dspam
or
$dham
);
my
$newtokens
= 0;
for
my
$token
(
keys
%{
$tokens
}) {
my
$status
=
$self
->{handles}->{tokens}->db_get(
$token
=>
my
$value
,
$rmw
);
if
(
$status
== 0) {
my
(
$spam
,
$ham
,
$oldatime
) = _unpack_token(
$value
);
$spam
+=
$dspam
;
$spam
= 0
if
$spam
< 0;
$ham
+=
$dham
;
$ham
= 0
if
$ham
< 0;
my
$newvalue
= _pack_token(
$spam
,
$ham
,
$newatime
);
$self
->{handles}->{tokens}->db_put(
$token
=>
$newvalue
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
elsif
(
$status
== DB_NOTFOUND) {
my
$spam
=
$dspam
;
$spam
= 0
if
$spam
< 0;
my
$ham
=
$dham
;
$ham
= 0
if
$ham
< 0;
my
$newvalue
= _pack_token(
$spam
,
$ham
,
$newatime
);
$self
->{handles}->{tokens}->db_put(
$token
=>
$newvalue
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
$newtokens
++;
}
else
{
die
"Couldn't get record: $BerkeleyDB::Error"
;
}
}
if
(
$newtokens
) {
my
$ntokens
=
$self
->_get(
vars
=>
"NTOKENS"
,
$rmw
) || 0;
$ntokens
+=
$newtokens
;
$ntokens
= 0
if
$ntokens
< 0;
$self
->{handles}->{vars}->db_put(
NTOKENS
=>
$ntokens
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
my
$newmagic
=
$self
->_get(
vars
=>
"NEWEST_TOKEN_AGE"
,
$rmw
) || 0;
if
(
$newatime
>
$newmagic
) {
$self
->{handles}->{vars}->db_put(
NEWEST_TOKEN_AGE
=>
$newatime
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
my
$oldmagic
=
$self
->_get(
vars
=>
"OLDEST_TOKEN_AGE"
,
$rmw
) ||
time
;
if
(
$newatime
&&
$newatime
<
$oldmagic
) {
$self
->{handles}->{vars}->db_put(
OLDEST_TOKEN_AGE
=>
$newatime
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
return
1;
}
sub
nspam_nham_get {
my
(
$self
) =
@_
;
dbg(
"bayes: Entering nspam_nham_get"
);
my
@vars
=
$self
->get_storage_variables();
(
$vars
[1],
$vars
[2]);
}
sub
nspam_nham_change {
my
(
$self
,
$ds
,
$dh
) =
@_
;
my
$nspam
=
$self
->_get(
vars
=>
"NSPAM"
,
$rmw
) || 0;
$nspam
+= (
$ds
|| 0);
$nspam
= 0
if
$nspam
< 0;
$self
->{handles}->{vars}->db_put(
NSPAM
=>
$nspam
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
my
$nham
=
$self
->_get(
vars
=>
"NHAM"
,
$rmw
) || 0;
$nham
+= (
$dh
|| 0);
$nham
= 0
if
$nham
< 0;
$self
->{handles}->{vars}->db_put(
NHAM
=>
$nham
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
return
1;
}
sub
tok_touch {
my
(
$self
,
$token
,
$atime
) =
@_
;
return
$self
->tok_touch_all([
$token
],
$atime
);
}
sub
tok_touch_all {
my
(
$self
,
$tokens
,
$newatime
) =
@_
;
for
my
$token
(@{
$tokens
}) {
my
$status
=
$self
->{handles}->{tokens}->db_get(
$token
=>
my
$value
,
$rmw
);
if
(
$status
== 0) {
my
(
$spam
,
$ham
,
$oldatime
) = _unpack_token(
$value
);
my
$newvalue
= _pack_token(
$spam
,
$ham
,
$newatime
);
$self
->{handles}->{tokens}->db_put(
$token
=>
$newvalue
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
elsif
(
$status
== DB_NOTFOUND) {
}
else
{
die
"Couldn't get record: $BerkeleyDB::Error"
;
}
}
return
1;
}
sub
cleanup {
my
(
$self
) =
@_
;
dbg(
"Running cleanup"
);
return
1;
}
sub
sync {
my
(
$self
,
$opts
) =
@_
;
dbg(
"Running sync"
);
return
1;
}
sub
perform_upgrade {
dbg(
"bayes: Entering perform_upgrade"
);
return
1;
}
sub
clear_database {
my
(
$self
) =
@_
;
dbg(
"bayes: Entering clear_database"
);
$self
->untie_db();
dbg(
"bayes: removing db."
);
my
$main
=
$self
->{bayes}->{main};
my
$path
=
$main
->sed_path(
$main
->{conf}->{bayes_path});
eval
{rmpath(
$path
)};
return
1;
}
sub
backup_database {
my
(
$self
) =
@_
;
dbg(
"bayes: Entering backup_database"
);
return
0
unless
$self
->tie_db_writable;
my
@vars
=
$self
->get_storage_variables;
print
"v\t$vars[6]\tdb_version # this must be the first line!!!\n"
;
print
"v\t$vars[1]\tnum_spam\n"
;
print
"v\t$vars[2]\tnum_nonspam\n"
;
my
$tokens
=
$self
->{handles}->{tokens}->db_cursor;
$tokens
or
die
"Couldn't get cursor: $BerkeleyDB::Error"
;
my
(
$token
,
$value
) = (
""
,
""
);
while
(
$tokens
->c_get(
$token
,
$value
,
$next
) == 0) {
my
(
$ts
,
$th
,
$atime
) = _unpack_token(
$value
);
my
$encoded
=
unpack
(
"H*"
,
$token
);
print
"t\t$ts\t$th\t$atime\t$encoded\n"
;
}
$tokens
->c_close == 0
or
die
"Couldn't close cursor: $BerkeleyDB::Error"
;
undef
$tokens
;
my
$seen
=
$self
->{handles}->{seen}->db_cursor;
$seen
or
die
"Couldn't get cursor: $BerkeleyDB::Error"
;
$token
=
""
;
while
(
$seen
->c_get(
$token
,
$value
,
$next
) == 0) {
print
"s\t$token\t$value\n"
;
}
$seen
->c_close == 0
or
die
"Couldn't close cursor: $BerkeleyDB::Error"
;
undef
$seen
;
$self
->untie_db();
return
1;
}
sub
restore_database {
my
(
$self
,
$filename
,
$showdots
) =
@_
;
dbg(
"bayes: Entering restore_database"
);
local
*DUMPFILE
;
if
(!
open
(DUMPFILE,
'<'
,
$filename
)) {
dbg(
"bayes: unable to open backup file $filename: $!"
);
return
0;
}
local
$SIG
{
'INT'
} =
'IGNORE'
;
local
$SIG
{
'HUP'
} =
'IGNORE'
if
!Mail::SpamAssassin::Util::am_running_on_windows();
local
$SIG
{
'TERM'
} =
'IGNORE'
;
unless
(
$self
->clear_database()) {
return
0;
}
$self
->untie_db();
unless
(
$self
->tie_db_writable()) {
return
0;
}
my
$token_count
= 0;
my
$db_version
;
my
$num_spam
;
my
$num_ham
;
my
$error_p
= 0;
my
$line_count
= 0;
my
$line
= <DUMPFILE>;
defined
$line
or
die
"Error reading dump file: $!"
;
$line_count
++;
if
(
$line
=~ m/^v\s+(\d+)\s+db_version/) {
$db_version
= $1;
}
else
{
dbg(
"bayes: database version must be the first line in the backup file, correct and re-run"
);
return
0;
}
unless
(
$db_version
== 2 ||
$db_version
== 3) {
warn
(
"bayes: database version $db_version is unsupported, must be version 2 or 3"
);
return
0;
}
my
$token_error_count
= 0;
my
$seen_error_count
= 0;
for
($!=0;
defined
(
$line
=<DUMPFILE>); $!=0) {
chomp
(
$line
);
$line_count
++;
if
(
$line_count
% 1000 == 0) {
print
STDERR
"."
if
$showdots
;
}
if
(
$line
=~ /^v\s+/) {
my
@parsed_line
=
split
(/\s+/,
$line
, 3);
my
$value
=
$parsed_line
[1] + 0;
if
(
$parsed_line
[2] eq
'num_spam'
) {
$num_spam
=
$value
;
}
elsif
(
$parsed_line
[2] eq
'num_nonspam'
) {
$num_ham
=
$value
;
}
else
{
dbg(
"bayes: restore_database: skipping unknown line: $line"
);
}
}
elsif
(
$line
=~ /^t\s+/) {
my
@parsed_line
=
split
(/\s+/,
$line
, 5);
my
$spam_count
=
$parsed_line
[1] + 0;
my
$ham_count
=
$parsed_line
[2] + 0;
my
$atime
=
$parsed_line
[3] + 0;
my
$token
=
$parsed_line
[4];
my
$token_warn_p
= 0;
my
@warnings
;
if
(
$spam_count
< 0) {
$spam_count
= 0;
push
(
@warnings
,
'spam count < 0, resetting'
);
$token_warn_p
= 1;
}
if
(
$ham_count
< 0) {
$ham_count
= 0;
push
(
@warnings
,
'ham count < 0, resetting'
);
$token_warn_p
= 1;
}
if
(
$spam_count
== 0 &&
$ham_count
== 0) {
dbg(
"bayes: token has zero spam and ham count, skipping"
);
next
;
}
if
(
$atime
>
time
()) {
$atime
=
time
();
push
(
@warnings
,
'atime > current time, resetting'
);
$token_warn_p
= 1;
}
if
(
$token_warn_p
) {
dbg(
"bayes: token (%s) has the following warnings:\n%s"
,
$token
,
join
(
"\n"
,
@warnings
));
}
if
(
$db_version
< 3) {
$token
=
substr
(sha1(
$token
), -5);
}
else
{
$token
=
pack
(
"H*"
,
$token
);
}
unless
(
$self
->_put_token(
$token
,
$spam_count
,
$ham_count
,
$atime
)) {
dbg(
"bayes: error inserting token for line: $line"
);
$token_error_count
++;
}
$token_count
++;
}
elsif
(
$line
=~ /^s\s+/) {
my
@parsed_line
=
split
(/\s+/,
$line
, 3);
my
$flag
=
$parsed_line
[1];
my
$msgid
=
$parsed_line
[2];
unless
(
$flag
eq
'h'
||
$flag
eq
's'
) {
dbg(
"bayes: unknown seen flag ($flag) for line: $line, skipping"
);
next
;
}
unless
(
$msgid
) {
dbg(
"bayes: blank msgid for line: $line, skipping"
);
next
;
}
unless
(
$self
->seen_put(
$msgid
,
$flag
)) {
dbg(
"bayes: error inserting msgid in seen table for line: $line"
);
$seen_error_count
++;
}
}
else
{
dbg(
"bayes: skipping unknown line: $line"
);
next
;
}
if
(
$token_error_count
>= 20) {
warn
"bayes: encountered too many errors (20) while parsing token line, reverting to empty database and exiting\n"
;
$self
->clear_database();
return
0;
}
if
(
$seen_error_count
>= 20) {
warn
"bayes: encountered too many errors (20) while parsing seen lines, reverting to empty database and exiting\n"
;
$self
->clear_database();
return
0;
}
}
defined
$line
|| $!==0 or
$!==EBADF ? dbg(
"bayes: error reading dump file: $!"
)
:
die
"error reading dump file: $!"
;
close
(DUMPFILE) or
die
"Can't close dump file: $!"
;
print
STDERR
"\n"
if
$showdots
;
unless
(
defined
(
$num_spam
)) {
dbg(
"bayes: unable to find num spam, please check file"
);
$error_p
= 1;
}
unless
(
defined
(
$num_ham
)) {
dbg(
"bayes: unable to find num ham, please check file"
);
$error_p
= 1;
}
if
(
$error_p
) {
dbg(
"bayes: error(s) while attempting to load $filename, clearing database, correct and re-run"
);
$self
->clear_database();
return
0;
}
if
(
$num_spam
||
$num_ham
) {
unless
(
$self
->nspam_nham_change(
$num_spam
,
$num_ham
)) {
dbg(
"bayes: error updating num spam and num ham, clearing database"
);
$self
->clear_database();
return
0;
}
}
dbg(
"bayes: parsed $line_count lines"
);
dbg(
"bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages"
);
$self
->untie_db();
return
1;
}
sub
db_readable {
my
(
$self
) =
@_
;
return
$self
->{is_really_open} &&
$self
->{is_officially_open};
}
sub
db_writable {
my
(
$self
) =
@_
;
dbg(
"bayes: Entering db_writable"
);
return
$self
->{is_really_open} &&
$self
->{is_officially_open} &&
$self
->{is_writable};
}
sub
_extract_atime {
my
(
$token
,
$value
) =
@_
;
my
(
$ts
,
$th
,
$atime
) = _unpack_token(
$value
);
$_
[2] =
$atime
;
return
0;
}
sub
_put_token {
my
(
$self
,
$token
,
$ts
,
$th
,
$atime
) =
@_
;
dbg(
"bayes: Entering _put_token"
);
$ts
||= 0;
$th
||= 0;
dbg(
"bayes: $token has spam $ts, ham $th, atime $atime"
);
my
$value
=
$self
->_get(
tokens
=>
$token
,
$rmw
);
my
$exists_already
=
defined
$value
? 1 : 0;
dbg(
"bayes: $token exists: $exists_already"
);
if
(
$ts
== 0 &&
$th
== 0) {
return
unless
$exists_already
;
my
$ntokens
=
$self
->_get(
vars
=>
"NTOKENS"
,
$rmw
);
$self
->{handles}->{vars}->db_put(
NTOKENS
=> --
$ntokens
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
dbg(
"bayes: ntokens is $ntokens"
);
my
$status
=
$self
->{handles}->{tokens}->db_del(
$token
);
$status
== 0 ||
$status
== DB_NOTFOUND
or
die
"Couldn't delete record: $BerkeleyDB::Error"
;
dbg(
"bayes: $token deleted"
);
}
else
{
unless
(
$exists_already
) {
my
$ntokens
=
$self
->_get(
vars
=>
"NTOKENS"
,
$rmw
);
$self
->{handles}->{vars}->db_put(
NTOKENS
=> ++
$ntokens
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
dbg(
"bayes: ntokens is $ntokens"
);
}
my
$newmagic
=
$self
->_get(
vars
=>
"NEWEST_TOKEN_AGE"
,
$rmw
) || 0;
dbg(
"bayes: NEWEST_TOKEN_AGE is $newmagic"
);
if
(
$atime
>
$newmagic
) {
dbg(
"bayes: Updating NEWEST_TOKEN_AGE"
);
$self
->{handles}->{vars}->db_put(
NEWEST_TOKEN_AGE
=>
$atime
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
my
$oldmagic
=
$self
->_get(
vars
=>
"OLDEST_TOKEN_AGE"
,
$rmw
) ||
time
;
dbg(
"bayes: OLDEST_TOKEN_AGE is $oldmagic"
);
if
(
$atime
&&
$atime
<
$oldmagic
) {
dbg(
"bayes: Updating OLDEST_TOKEN_AGE to $atime"
);
$self
->{handles}->{vars}->db_put(
OLDEST_TOKEN_AGE
=>
$atime
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
my
$value
= _pack_token(
$ts
,
$th
,
$atime
);
dbg(
"bayes: Setting $token to $value"
);
dbg(
"bayes: Handle is $self->{handles}->{tokens}"
);
$self
->{handles}->{tokens}->db_put(
$token
,
$value
) == 0
or
die
"Couldn't put record: $BerkeleyDB::Error"
;
}
dbg(
"bayes: Leaving _put_token"
);
return
1;
}
sub
_unpack_token {
my
$value
=
shift
|| 0;
my
(
$packed
,
$ts
,
$th
,
$atime
) =
unpack
(
"CVVV"
,
$value
);
if
((
$packed
& FORMAT_FLAG) == ONE_BYTE_FORMAT) {
return
((
$packed
& ONE_BYTE_SSS_BITS) >> 3,
$packed
& ONE_BYTE_HHH_BITS,
$ts
|| 0);
}
elsif
((
$packed
& FORMAT_FLAG) == TWO_LONGS_FORMAT) {
return
(
$ts
|| 0,
$th
|| 0,
$atime
|| 0);
}
else
{
warn
"bayes: unknown packing format for bayes db, please re-learn: $packed"
;
return
(0, 0, 0);
}
}
sub
_pack_token {
my
(
$ts
,
$th
,
$atime
) =
@_
;
$ts
||= 0;
$th
||= 0;
$atime
||= 0;
if
(
$ts
< 8 &&
$th
< 8) {
return
pack
(
"CV"
, (ONE_BYTE_FORMAT | (
$ts
<< 3) |
$th
) & 255,
$atime
);
}
else
{
return
pack
(
"CVVV"
, TWO_LONGS_FORMAT,
$ts
,
$th
,
$atime
);
}
}
sub
_get {
my
(
$self
,
$table
,
$key
,
$flags
) =
@_
;
$flags
|= 0;
my
$value
=
""
;
my
$status
=
$self
->{handles}->{
$table
}->db_get(
$key
=>
$value
,
$flags
);
if
(
$status
== 0) {
return
$value
;
}
elsif
(
$status
== DB_NOTFOUND) {
return
;
}
else
{
die
"Couldn't get record: $BerkeleyDB::Error"
;
}
}
sub
_mget {
my
(
$self
,
$table
,
$keys
,
$flags
) =
@_
;
my
@results
;
$flags
|= 0;
my
$handle
=
$self
->{handles}->{
$table
};
for
my
$key
(
@$keys
) {
my
$value
=
""
;
my
$status
=
$handle
->db_get(
$key
=>
$value
,
$flags
);
undef
$value
if
$status
!= 0;
$status
== 0 ||
$status
== DB_NOTFOUND
or
die
"Couldn't get record: $BerkeleyDB::Error"
;
push
(
@results
,
$value
);
}
return
@results
;
}
sub
sa_die { Mail::SpamAssassin::sa_die(
@_
); }
1;