use
Errno
qw(ENOENT EACCES EBADF)
;
our
(
$MESSAGES
,
$AICache
,
%class_opts
);
our
@ISA
=
qw()
;
sub
new {
my
$class
=
shift
;
$class
=
ref
(
$class
) ||
$class
;
my
$self
=
shift
;
if
(!
defined
$self
) {
$self
= { }; }
bless
(
$self
,
$class
);
$self
->{determine_receive_date} =
defined
$self
->{opt_after} ||
defined
$self
->{opt_before} ||
$self
->{opt_want_date};
$self
->{s} = [ ];
$self
->{h} = [ ];
if
(
$self
->{opt_all}) {
$self
->{opt_max_size} = 0;
}
elsif
(!
defined
$self
->{opt_max_size}) {
$self
->{opt_max_size} = BIG_BYTES;
}
$self
;
}
sub
set_functions {
my
(
$self
,
$wanted
,
$result
) =
@_
;
$self
->{wanted_sub} =
$wanted
if
defined
$wanted
;
$self
->{result_sub} =
$result
if
defined
$result
;
}
sub
run {
my
(
$self
,
@targets
) =
@_
;
if
(!
defined
$self
->{wanted_sub}) {
warn
"archive-iterator: set_functions never called"
;
return
0;
}
foreach
(
'bzip2'
,
'xz'
,
'lz4'
,
'lzip'
,
'lzop'
) {
$self
->{
$_
.
'_path'
} = Mail::SpamAssassin::Util::find_executable_in_env_path(
$_
);
}
$self
->_scan_targets(\
@targets
,
sub
{
my
(
$self
,
$date
,
$class
,
$format
,
$mail
) =
@_
;
push
(@{
$self
->{
$class
}}, _index_pack(
$date
,
$class
,
$format
,
$mail
));
}
);
my
$messages
;
$messages
=
$self
->{s};
undef
$self
->{s};
push
(@{
$messages
}, @{
$self
->{h}});
undef
$self
->{h};
$MESSAGES
=
scalar
(@{
$messages
});
return
$self
->_run(
$messages
);
}
sub
_run {
my
(
$self
,
$messages
) =
@_
;
my
$messages_run
= 0;
while
(
my
$message
=
shift
@{
$messages
}) {
my
(
$class
,
undef
,
$date
,
undef
,
$result
) =
$self
->_run_message(
$message
);
if
(
$result
) {
$messages_run
++;
&{
$self
->{result_sub}}(
$class
,
$result
,
$date
);
}
}
return
$messages_run
> 0;
}
sub
_run_message {
my
(
$self
,
$msg
) =
@_
;
my
(
$date
,
$class
,
$format
,
$mail
) = _index_unpack(
$msg
);
if
(
$format
eq
'f'
) {
return
$self
->_run_file(
$class
,
$format
,
$mail
,
$date
);
}
elsif
(
$format
eq
'm'
) {
return
$self
->_run_mailbox(
$class
,
$format
,
$mail
,
$date
);
}
elsif
(
$format
eq
'b'
) {
return
$self
->_run_mbx(
$class
,
$format
,
$mail
,
$date
);
}
}
sub
_run_file {
my
(
$self
,
$class
,
$format
,
$where
,
$date
) =
@_
;
my
$fh
=
$self
->_mail_open(
$where
, 1);
return
unless
$fh
;
my
$opt_max_size
=
$self
->{opt_max_size};
if
(!
$opt_max_size
) {
}
elsif
(!-f _) {
}
elsif
(-s _ >
$opt_max_size
) {
info(
"archive-iterator: skipping large message: "
.
"file size %d, limit %d bytes"
, -s _,
$opt_max_size
);
close
$fh
or
die
"error closing input file: $!"
;
return
;
}
my
@msg
;
my
$header
;
my
$len
= 0;
my
$str
=
''
;
my
(
$inbuf
,
$nread
);
while
(
$nread
=
read
(
$fh
,
$inbuf
,16384) ) {
$len
+=
$nread
;
if
(
$opt_max_size
&&
$len
>
$opt_max_size
) {
info(
"archive-iterator: skipping large message: read %d, limit %d bytes"
,
$len
,
$opt_max_size
);
close
$fh
or
die
"error closing input file: $!"
;
return
;
}
$str
.=
$inbuf
;
}
defined
$nread
or
die
"error reading: $!"
;
undef
$inbuf
;
@msg
=
split
(/^/m,
$str
, -1);
undef
$str
;
for
my
$j
(0..
$#msg
) {
if
(
$msg
[
$j
] =~ /^\015?$/) {
$header
=
$j
;
last
}
}
close
$fh
or
die
"error closing input file: $!"
;
if
(
$date
== AI_TIME_UNKNOWN &&
$self
->{determine_receive_date}) {
$date
= Mail::SpamAssassin::Util::receive_date(
join
(
''
,
splice
(
@msg
, 0,
$header
)));
}
return
(
$class
,
$format
,
$date
,
$where
, &{
$self
->{wanted_sub}}(
$class
,
$where
,
$date
, \
@msg
,
$format
));
}
sub
_run_mailbox {
my
(
$self
,
$class
,
$format
,
$where
,
$date
) =
@_
;
my
(
$file
,
$offset
);
{
local
($1,$2);
(
$file
,
$offset
) = (
$where
=~ m/(.*)\.(\d+)$/);
}
my
@msg
;
my
$header
;
my
$fh
=
$self
->_mail_open(
$file
, 1);
return
unless
$fh
;
my
$opt_max_size
=
$self
->{opt_max_size};
dbg(
"archive-iterator: _run_mailbox %s, ofs %d, limit %d"
,
$file
,
$offset
,
$opt_max_size
||0);
seek
(
$fh
,
$offset
,0) or
die
"cannot reposition file to $offset: $!"
;
my
$size
= 0;
for
($!=0; <
$fh
>; $!=0) {
last
if
(/^From / &&
@msg
&&
$_
=~
$self
->{opt_from_regex});
$size
+=
length
(
$_
);
push
(
@msg
,
$_
);
if
(
$opt_max_size
&&
$size
>
$opt_max_size
) {
info(
"archive-iterator: skipping large message: "
.
"%d lines, %d bytes, limit %d bytes"
,
scalar
@msg
,
$size
,
$opt_max_size
);
close
$fh
or
die
"error closing input file: $!"
;
return
;
}
if
(!
defined
$header
&& /^\s*$/) {
$header
=
$#msg
;
}
}
defined
$_
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
close
$fh
or
die
"error closing input file: $!"
;
if
(
$date
== AI_TIME_UNKNOWN &&
$self
->{determine_receive_date}) {
$date
= Mail::SpamAssassin::Util::receive_date(
join
(
''
,
splice
(
@msg
, 0,
$header
)));
}
return
(
$class
,
$format
,
$date
,
$where
, &{
$self
->{wanted_sub}}(
$class
,
$where
,
$date
, \
@msg
,
$format
));
}
sub
_run_mbx {
my
(
$self
,
$class
,
$format
,
$where
,
$date
) =
@_
;
my
(
$file
,
$offset
);
{
local
($1,$2);
(
$file
,
$offset
) = (
$where
=~ m/(.*)\.(\d+)$/);
}
my
@msg
;
my
$header
;
my
$fh
=
$self
->_mail_open(
$file
, 1);
return
unless
$fh
;
my
$opt_max_size
=
$self
->{opt_max_size};
dbg(
"archive-iterator: _run_mbx %s, ofs %d, limit %d"
,
$file
,
$offset
,
$opt_max_size
||0);
seek
(
$fh
,
$offset
,0) or
die
"cannot reposition file to $offset: $!"
;
my
$size
= 0;
for
($!=0; <
$fh
>; $!=0) {
last
if
(
$_
=~ MBX_SEPARATOR);
$size
+=
length
(
$_
);
push
(
@msg
,
$_
);
if
(
$opt_max_size
&&
$size
>
$opt_max_size
) {
info(
"archive-iterator: skipping large message: "
.
"%d lines, %d bytes, limit %d bytes"
,
scalar
@msg
,
$size
,
$opt_max_size
);
close
$fh
or
die
"error closing input file: $!"
;
return
;
}
if
(!
defined
$header
&& /^\s*$/) {
$header
=
$#msg
;
}
}
defined
$_
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
close
$fh
or
die
"error closing input file: $!"
;
if
(
$date
== AI_TIME_UNKNOWN &&
$self
->{determine_receive_date}) {
$date
= Mail::SpamAssassin::Util::receive_date(
join
(
''
,
splice
(
@msg
, 0,
$header
)));
}
return
(
$class
,
$format
,
$date
,
$where
, &{
$self
->{wanted_sub}}(
$class
,
$where
,
$date
, \
@msg
,
$format
));
}
sub
_scan_targets {
my
(
$self
,
$targets
,
$bkfunc
) =
@_
;
%class_opts
= ();
foreach
my
$target
(@${targets}) {
if
(!
defined
$target
) {
warn
"archive-iterator: invalid (undef) value in target list"
;
next
;
}
my
%opts
;
if
(
ref
$target
eq
'HASH'
) {
foreach
my
$k
(
keys
%{
$target
}) {
if
(
$k
=~ /^opt_/) {
$opts
{
$k
} =
$target
->{
$k
};
}
}
$target
=
$target
->{target};
}
my
(
$class
,
$format
,
$rawloc
) =
split
(/:/,
$target
, 3);
if
(!
defined
$format
) {
warn
"archive-iterator: invalid (undef) format in target list, $target"
;
next
;
}
if
(!
defined
$rawloc
) {
warn
"archive-iterator: invalid (undef) raw location in target list, $target"
;
next
;
}
if
(
$rawloc
eq
'-'
) {
warn
'archive-iterator: raw location "-" is not supported'
;
next
;
}
$class
=
substr
(
$class
, 0, 1) ||
'h'
;
$class_opts
{
$class
} = \
%opts
;
foreach
my
$k
(
keys
%opts
) {
$self
->{
$k
} =
$opts
{
$k
};
}
$self
->_set_default_message_selection_opts();
my
@locations
=
$self
->_fix_globs(
$rawloc
);
foreach
my
$location
(
@locations
) {
my
$method
;
my
$thisformat
=
$format
;
if
(
$format
eq
'detect'
) {
my
$stat_errn
=
stat
(
$location
) ? 0 : 0+$!;
if
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $location: $!\n"
;
next
;
}
elsif
(-d _) {
$thisformat
=
'dir'
;
}
elsif
(
$location
=~ /\.mbox/i) {
$thisformat
=
'mbox'
;
}
else
{
$thisformat
=
'file'
;
}
}
if
(
$thisformat
eq
'dir'
) {
$method
= \
&_scan_directory
;
}
elsif
(
$thisformat
eq
'mbox'
) {
$method
= \
&_scan_mailbox
;
}
elsif
(
$thisformat
eq
'file'
) {
$method
= \
&_scan_file
;
}
elsif
(
$thisformat
eq
'mbx'
) {
$method
= \
&_scan_mbx
;
}
else
{
warn
"archive-iterator: format $thisformat (from $format) unknown!"
;
next
;
}
&{
$method
}(
$self
,
$class
,
$location
,
$bkfunc
);
}
}
}
sub
_mail_open {
my
(
$self
,
$file
,
$ignore_missing
) =
@_
;
my
$fh
;
if
(!
open
(
$fh
,
'<'
,
$file
)) {
if
(
$ignore_missing
&& $! == ENOENT) {
dbg(
"archive-iterator: no access to $file: $!"
);
}
else
{
warn
"archive-iterator: no access to $file: $!\n"
}
return
;
}
binmode
$fh
or
die
"cannot set input file to binmode: $!"
;
if
(-f
$file
&&
read
(
$fh
,
my
$magic
, 6)) {
if
(
$magic
=~ /^\x1F\x8B/) {
dbg(
"archive-iterator: detected gzip file $file, reopening with IO::Zlib"
);
close
$fh
or
die
"error closing input file: $!"
;
if
($@) {
warn
"archive-iterator: IO::Zlib required for $file: $@\n"
;
return
; }
$fh
= IO::Zlib->new(
$file
,
"rb"
);
if
(!
$fh
) {
if
(
$ignore_missing
&& $! == ENOENT) {
dbg(
"archive-iterator: no access to $file: $!"
);
}
else
{
warn
"archive-iterator: no access to $file: $!\n"
;
}
return
;
}
}
elsif
(
$magic
=~ /^\x42\x5A(?:\x68|\x30)/) {
dbg(
"archive-iterator: detected bzip2 file $file, reopening with bzip2"
);
close
$fh
or
die
"error closing input file: $!"
;
if
(!
$self
->{bzip2_path}) {
warn
"archive-iterator: bzip2 executable required for $file\n"
;
return
;
}
if
(!
open
(
$fh
,
'-|'
,
$self
->{bzip2_path},
'-cd'
,
$file
)) {
warn
"archive-iterator: no access to $file: $!\n"
;
return
;
}
binmode
$fh
or
die
"cannot set input file to binmode: $!"
;
}
elsif
(
$magic
=~ /^\xFD\x37\x7A\x58\x5A\x00/) {
dbg(
"archive-iterator: detected xz file $file, reopening with xz"
);
close
$fh
or
die
"error closing input file: $!"
;
if
(!
$self
->{xz_path}) {
warn
"archive-iterator: xz executable required for $file\n"
;
return
;
}
if
(!
open
(
$fh
,
'-|'
,
$self
->{xz_path},
'-cd'
,
$file
)) {
warn
"archive-iterator: no access to $file: $!\n"
;
return
;
}
binmode
$fh
or
die
"cannot set input file to binmode: $!"
;
}
elsif
(
$magic
=~ /^\x04\x22\x4D\x18/) {
dbg(
"archive-iterator: detected lz4 file $file, reopening with lz4"
);
close
$fh
or
die
"error closing input file: $!"
;
if
(!
$self
->{lz4_path}) {
warn
"archive-iterator: lz4 executable required for $file\n"
;
return
;
}
if
(!
open
(
$fh
,
'-|'
,
$self
->{lz4_path},
'-cd'
,
$file
)) {
warn
"archive-iterator: no access to $file: $!\n"
;
return
;
}
binmode
$fh
or
die
"cannot set input file to binmode: $!"
;
}
elsif
(
$magic
=~ /^\x4C\x5A\x49\x50/) {
dbg(
"archive-iterator: detected lzip file $file, reopening with lzip"
);
close
$fh
or
die
"error closing input file: $!"
;
if
(!
$self
->{lzip_path}) {
warn
"archive-iterator: lzip executable required for $file\n"
;
return
;
}
if
(!
open
(
$fh
,
'-|'
,
$self
->{lzip_path},
'-cd'
,
$file
)) {
warn
"archive-iterator: no access to $file: $!\n"
;
return
;
}
binmode
$fh
or
die
"cannot set input file to binmode: $!"
;
}
elsif
(
$magic
=~ /^\x89\x4C\x5A\x4F\x00\x0D/) {
dbg(
"archive-iterator: detected lzo file $file, reopening with lzop"
);
close
$fh
or
die
"error closing input file: $!"
;
if
(!
$self
->{lzop_path}) {
warn
"archive-iterator: lzop executable required for $file\n"
;
return
;
}
if
(!
open
(
$fh
,
'-|'
,
$self
->{lzop_path},
'-cd'
,
$file
)) {
warn
"archive-iterator: no access to $file: $!\n"
;
return
;
}
binmode
$fh
or
die
"cannot set input file to binmode: $!"
;
}
else
{
seek
(
$fh
,0,0);
}
}
return
$fh
;
}
sub
_set_default_message_selection_opts {
my
(
$self
) =
@_
;
$self
->{opt_scanprob} = 1.0
unless
(
defined
$self
->{opt_scanprob});
$self
->{opt_want_date} = 1
unless
(
defined
$self
->{opt_want_date});
$self
->{opt_cache} = 0
unless
(
defined
$self
->{opt_cache});
if
(!
defined
$self
->{opt_from_regex}) {
$self
->{opt_from_regex} =
qr/^From \S+ ?(\S\S\S \S\S\S .?\d .?\d:\d\d:\d\d \d{4}|.?\d-\d\d-\d{4}_\d\d:\d\d:\d\d_)/
;
}
elsif
(
ref
(
$self
->{opt_from_regex}) ne
'Regexp'
) {
my
(
$rec
,
$err
) = compile_regexp(
$self
->{opt_from_regex}, 1);
if
(!
$rec
) {
die
"fatal: invalid mbox_format_from_regex '$self->{opt_from_regex}': $err\n"
;
}
$self
->{opt_from_regex} =
$rec
;
}
dbg(
"archive-iterator: _set_default_message_selection_opts After: Scanprob[$self->{opt_scanprob}], want_date[$self->{opt_want_date}], cache[$self->{opt_cache}], from_regex[$self->{opt_from_regex}]"
);
}
sub
_message_is_useful_by_date {
my
(
$self
,
$date
) =
@_
;
if
(!
$self
->{opt_after} && !
$self
->{opt_before}) {
return
1;
}
return
0
unless
$date
;
if
(!
$self
->{opt_before}) {
return
$date
>
$self
->{opt_after};
}
else
{
return
((
$date
<
$self
->{opt_before}) && (
$date
>
$self
->{opt_after}));
}
}
sub
_message_is_useful_by_file_modtime {
my
(
$self
,
$date
) =
@_
;
return
1
unless
$date
;
if
(
$self
->{opt_after}) {
return
(
$date
>
$self
->{opt_after});
}
else
{
return
1;
}
}
sub
_scanprob_says_scan {
my
(
$self
) =
@_
;
if
(
defined
$self
->{opt_scanprob} &&
$self
->{opt_scanprob} < 1.0) {
if
(
int
(
rand
( 1 /
$self
->{opt_scanprob} ) ) != 0 ) {
return
0;
}
}
return
1;
}
sub
_index_pack {
return
pack
(
"NAAA*"
,
@_
);
}
sub
_index_unpack {
return
unpack
(
"NAAA*"
,
$_
[0]);
}
sub
_scan_directory {
my
(
$self
,
$class
,
$folder
,
$bkfunc
) =
@_
;
my
(
@files
,
@subdirs
);
if
(-d
"$folder/new"
&& -d
"$folder/cur"
&& -d
"$folder/tmp"
) {
for
my
$sub
(
"new"
,
"cur"
) {
opendir
(DIR,
"$folder/$sub"
)
or
die
"archive-iterator: can't open '$folder/$sub' dir: $!\n"
;
push
@files
,
map
{
"$sub/$_"
}
grep
{ !/^\.|:2,.
*T
/ }
readdir
(DIR);
closedir
(DIR) or
die
"error closing directory $folder: $!"
;
}
}
elsif
(-f
"$folder/cyrus.header"
) {
opendir
(DIR,
$folder
)
or
die
"archive-iterator: can't open '$folder' dir: $!\n"
;
@files
=
grep
{
$_
ne
'.'
&&
$_
ne
'..'
&&
/^\S+$/ && !/^cyrus\.(?:
index
|header|cache|seen)/ }
readdir
(DIR);
closedir
(DIR) or
die
"error closing directory $folder: $!"
;
}
else
{
opendir
(DIR,
$folder
)
or
die
"archive-iterator: can't open '$folder' dir: $!\n"
;
@files
=
grep
{ !/^[,.]/ }
readdir
(DIR);
closedir
(DIR) or
die
"error closing directory $folder: $!"
;
}
$_
=
"$folder/$_"
for
@files
;
if
(!
@files
) {
return
;
}
$self
->_create_cache(
'dir'
,
$folder
);
foreach
my
$file
(
@files
) {
my
$stat_errn
=
stat
(
$file
) ? 0 : 0+$!;
if
(
$stat_errn
== ENOENT) {
dbg(
"archive-iterator: no access to $file: $!"
);
}
elsif
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $file: $!\n"
;
}
elsif
(-f _ || -c _ || -p _) {
$self
->_scan_file(
$class
,
$file
,
$bkfunc
);
}
elsif
(-d _) {
push
(
@subdirs
,
$file
);
}
else
{
warn
"archive-iterator: $file is not a plain file or directory\n"
;
}
}
undef
@files
;
foreach
my
$dir
(
@subdirs
) {
$self
->_scan_directory(
$class
,
$dir
,
$bkfunc
);
}
if
(
defined
$AICache
) {
$AICache
=
$AICache
->finish();
}
}
sub
_scan_file {
my
(
$self
,
$class
,
$mail
,
$bkfunc
) =
@_
;
$self
->_bump_scan_progress();
if
(!
defined
$AICache
) {
my
@s
=
stat
(
$mail
);
@s
or
warn
"archive-iterator: no access to $mail: $!"
;
return
unless
$self
->_message_is_useful_by_file_modtime(
$s
[9]);
}
my
$date
= AI_TIME_UNKNOWN;
if
(
$self
->{determine_receive_date}) {
unless
(
defined
$AICache
and
$date
=
$AICache
->check(
$mail
)) {
my
$stat_errn
=
stat
(
$mail
) ? 0 : 0+$!;
if
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $mail: $!"
;
return
;
}
elsif
(!-f _) {
return
;
}
my
$header
=
''
;
my
$fh
=
$self
->_mail_open(
$mail
);
return
unless
$fh
;
for
($!=0; <
$fh
>; $!=0) {
last
if
/^\015?$/s;
$header
.=
$_
;
}
defined
$_
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
close
$fh
or
die
"error closing input file: $!"
;
return
if
(
$self
->{opt_skip_empty_messages} &&
$header
eq
''
);
$date
= Mail::SpamAssassin::Util::receive_date(
$header
);
if
(
defined
$AICache
) {
$AICache
->update(
$mail
,
$date
);
}
}
return
if
!
$self
->_message_is_useful_by_date(
$date
);
return
if
!
$self
->_scanprob_says_scan();
}
else
{
return
if
(
$self
->{opt_skip_empty_messages} && (-z
$mail
));
}
&{
$bkfunc
}(
$self
,
$date
,
$class
,
'f'
,
$mail
);
return
;
}
sub
_scan_mailbox {
my
(
$self
,
$class
,
$folder
,
$bkfunc
) =
@_
;
my
@files
;
my
$stat_errn
=
stat
(
$folder
) ? 0 : 0+$!;
if
(
$stat_errn
== ENOENT) {
}
elsif
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $folder: $!"
;
}
elsif
(-f _) {
push
(
@files
,
$folder
);
}
elsif
(-d _) {
$folder
=~ s/\/\s*$//;
if
(!
opendir
(DIR,
$folder
)) {
warn
"archive-iterator: can't open '$folder' dir: $!\n"
;
return
;
}
while
(
$_
=
readdir
(DIR)) {
next
if
$_
eq
'.'
||
$_
eq
'..'
|| !/^[^\.]\S*$/;
$stat_errn
=
stat
(
"$folder/$_"
) ? 0 : 0+$!;
if
(
$stat_errn
== ENOENT) {
}
elsif
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $folder/$_: $!"
;
}
elsif
(-f _) {
push
(
@files
,
"$folder/$_"
);
}
}
closedir
(DIR) or
die
"error closing directory $folder: $!"
;
}
else
{
warn
"archive-iterator: $folder is not a plain file or directory: $!"
;
}
foreach
my
$file
(
@files
) {
$self
->_bump_scan_progress();
if
(
$file
=~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
warn
"archive-iterator: compressed mbox folders are not supported at this time\n"
;
next
;
}
my
@s
=
stat
(
$file
);
@s
or
warn
"archive-iterator: no access to $file: $!"
;
next
unless
$self
->_message_is_useful_by_file_modtime(
$s
[9]);
my
$info
= {};
my
$count
;
$self
->_create_cache(
'mbox'
,
$file
);
if
(
$self
->{opt_cache}) {
if
(
$count
=
$AICache
->count()) {
$info
=
$AICache
->check();
}
}
unless
(
$count
) {
my
$fh
=
$self
->_mail_open(
$file
);
next
unless
$fh
;
my
$start
= 0;
my
$where
= 0;
my
$first
=
''
;
my
$header
=
''
;
my
$in_header
= 0;
while
(!
eof
$fh
) {
my
$offset
=
$start
;
my
$header
=
$first
;
for
($!=0; <
$fh
>; $!=0) {
if
(
$in_header
) {
if
(/^\015?$/s) {
$in_header
= 0;
}
else
{
$header
.=
$_
;
}
}
if
(/^From / &&
$_
=~
$self
->{opt_from_regex}) {
$in_header
= 1;
$first
=
$_
;
$start
=
$where
;
$where
=
tell
$fh
;
$where
>= 0 or
die
"cannot obtain file position: $!"
;
last
;
}
$where
=
tell
$fh
;
$where
>= 0 or
die
"cannot obtain file position: $!"
;
}
defined
$_
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
if
(
$header
ne
''
) {
$self
->_bump_scan_progress();
$info
->{
$offset
} = Mail::SpamAssassin::Util::receive_date(
$header
);
}
}
close
$fh
or
die
"error closing input file: $!"
;
}
while
(
my
(
$k
,
$v
) =
each
%{
$info
}) {
if
(
defined
$AICache
&& !
$count
) {
$AICache
->update(
$k
,
$v
);
}
if
(
$self
->{determine_receive_date}) {
next
if
!
$self
->_message_is_useful_by_date(
$v
);
}
next
if
!
$self
->_scanprob_says_scan();
&{
$bkfunc
}(
$self
,
$v
,
$class
,
'm'
,
"$file.$k"
);
}
if
(
defined
$AICache
) {
$AICache
=
$AICache
->finish();
}
}
}
sub
_scan_mbx {
my
(
$self
,
$class
,
$folder
,
$bkfunc
) =
@_
;
my
(
@files
,
$fp
);
my
$stat_errn
=
stat
(
$folder
) ? 0 : 0+$!;
if
(
$stat_errn
== ENOENT) {
}
elsif
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $folder: $!"
;
}
elsif
(-f _) {
push
(
@files
,
$folder
);
}
elsif
(-d _) {
$folder
=~ s/\/\s*$//;
if
(!
opendir
(DIR,
$folder
)) {
warn
"archive-iterator: can't open '$folder' dir: $!\n"
;
return
;
}
while
(
$_
=
readdir
(DIR)) {
next
if
$_
eq
'.'
||
$_
eq
'..'
|| !/^[^\.]\S*$/;
$stat_errn
=
stat
(
"$folder/$_"
) ? 0 : 0+$!;
if
(
$stat_errn
== ENOENT) {
}
elsif
(
$stat_errn
!= 0) {
warn
"archive-iterator: no access to $folder/$_: $!"
;
}
elsif
(-f _) {
push
(
@files
,
"$folder/$_"
);
}
}
closedir
(DIR) or
die
"error closing directory $folder: $!"
;
}
else
{
warn
"archive-iterator: $folder is not a plain file or directory: $!"
;
}
foreach
my
$file
(
@files
) {
$self
->_bump_scan_progress();
if
(
$folder
=~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
warn
"archive-iterator: compressed mbx folders are not supported at this time\n"
;
next
;
}
my
@s
=
stat
(
$file
);
@s
or
warn
"archive-iterator: no access to $file: $!"
;
next
unless
$self
->_message_is_useful_by_file_modtime(
$s
[9]);
my
$info
= {};
my
$count
;
$self
->_create_cache(
'mbx'
,
$file
);
if
(
$self
->{opt_cache}) {
if
(
$count
=
$AICache
->count()) {
$info
=
$AICache
->check();
}
}
unless
(
$count
) {
my
$fh
=
$self
->_mail_open(
$file
);
next
unless
$fh
;
$! = 0;
$fp
= <
$fh
>;
defined
$fp
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
if
(!
defined
$fp
) {
die
"archive-iterator: error: mailbox not in mbx format - empty!\n"
;
}
elsif
(
$fp
!~ /\
*mbx
\*/) {
die
"archive-iterator: error: mailbox not in mbx format!\n"
;
}
seek
(
$fh
,2048,0) or
die
"cannot reposition file to 2048: $!"
;
for
($!=0; <
$fh
>; $!=0) {
if
(
$_
=~ MBX_SEPARATOR) {
my
$offset
=
tell
$fh
;
$offset
>= 0 or
die
"cannot obtain file position: $!"
;
my
$size
= $2;
my
$header
=
''
;
for
($!=0; <
$fh
>; $!=0) {
last
if
(/^\015?$/s);
$header
.=
$_
;
}
defined
$_
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
if
(!(
$self
->{opt_skip_empty_messages} &&
$header
eq
''
)) {
$self
->_bump_scan_progress();
$info
->{
$offset
} = Mail::SpamAssassin::Util::receive_date(
$header
);
}
seek
(
$fh
,
$offset
+
$size
, 0)
or
die
"cannot reposition file to $offset + $size: $!"
;
}
else
{
die
"archive-iterator: error: failure to read message body!\n"
;
}
}
defined
$_
|| $!==0 or
$!==EBADF ? dbg(
"archive-iterator: error reading: $!"
)
:
die
"error reading: $!"
;
close
$fh
or
die
"error closing input file: $!"
;
}
while
(
my
(
$k
,
$v
) =
each
%{
$info
}) {
if
(
defined
$AICache
&& !
$count
) {
$AICache
->update(
$k
,
$v
);
}
if
(
$self
->{determine_receive_date}) {
next
if
!
$self
->_message_is_useful_by_date(
$v
);
}
next
if
!
$self
->_scanprob_says_scan();
&{
$bkfunc
}(
$self
,
$v
,
$class
,
'b'
,
"$file.$k"
);
}
if
(
defined
$AICache
) {
$AICache
=
$AICache
->finish();
}
}
}
sub
_bump_scan_progress {
my
(
$self
) =
@_
;
if
(
exists
$self
->{scan_progress_sub}) {
return
unless
(
$self
->{scan_progress_counter}++ % 50 == 0);
$self
->{scan_progress_sub}->();
}
}
{
my
$home
;
sub
_fix_globs {
my
(
$self
,
$path
) =
@_
;
unless
(
defined
$home
) {
$home
=
$ENV
{
'HOME'
};
unless
(
$home
) {
if
(!Mail::SpamAssassin::Util::am_running_on_windows()) {
$home
= (Mail::SpamAssassin::Util::portable_getpwuid($<))[7];
}
else
{
my
$vol
=
$ENV
{
'HOMEDRIVE'
} ||
'C:'
;
my
$dir
=
$ENV
{
'HOMEPATH'
} ||
'\\'
;
$home
= File::Spec->catpath(
$vol
,
$dir
,
''
);
}
$home
||=
'~'
;
}
}
$path
=~ s,^~/,${home}/,;
$path
=~ s/(?<!\\)(\s)/\\$1/g;
return
glob
(
$path
);
}
}
sub
_create_cache {
my
(
$self
,
$type
,
$path
) =
@_
;
if
(
$self
->{opt_cache}) {
$AICache
= Mail::SpamAssassin::AICache->new({
'type'
=>
$type
,
'prefix'
=>
$self
->{opt_cachedir},
'path'
=>
$path
,
});
}
}
1;