Hide Show 27 lines of Pod
our
@ISA
=
qw(Mail::SpamAssassin::Plugin)
;
sub
new {
my
$class
=
shift
;
my
$mailsaobject
=
shift
;
$class
=
ref
(
$class
) ||
$class
;
my
$self
=
$class
->SUPER::new(
$mailsaobject
);
bless
(
$self
,
$class
);
$self
->register_eval_rule(
"check_for_mime"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_for_mime_html"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_for_mime_html_only"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_mime_multipart_ratio"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_msg_parse_flags"
,
$Mail::SpamAssassin::Conf::TYPE_HEADER_EVALS
);
$self
->register_eval_rule(
"check_for_ascii_text_illegal"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_abundant_unicode_ratio"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_for_faraway_charset"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_for_uppercase"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_ma_non_text"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_base64_length"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
"check_qp_ratio"
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
return
$self
;
}
sub
are_more_high_bits_set {
my
(
$self
,
$str
) =
@_
;
my
$numhis
=
$str
=~
tr
/\x00-\x7F//c;
my
$numlos
=
length
(
$str
) -
$numhis
;
(
$numlos
<=
$numhis
&&
$numhis
> 3);
}
Hide Show 8 lines of Pod
sub
has_check_for_ascii_text_illegal { 1 }
Hide Show 8 lines of Pod
sub
check_for_ascii_text_illegal {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{mime_ascii_text_illegal};
return
(
$pms
->{mime_ascii_text_illegal} > 0);
}
Hide Show 6 lines of Pod
sub
has_check_abundant_unicode_ratio { 1 }
Hide Show 8 lines of Pod
sub
check_abundant_unicode_ratio {
my
(
$self
,
$pms
,
undef
,
$ratio
) =
@_
;
return
0
unless
(
$ratio
=~ /^\d{0,3}\.\d{1,3}$/);
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{mime_text_unicode_ratio};
return
(
$pms
->{mime_text_unicode_ratio} >=
$ratio
);
}
sub
check_for_faraway_charset {
my
(
$self
,
$pms
,
$body
) =
@_
;
my
$type
=
$pms
->get(
'Content-Type'
,
undef
);
my
@locales
= Mail::SpamAssassin::Util::get_my_locales(
$self
->{main}->{conf}->{ok_locales});
return
0
if
grep
{
$_
eq
"all"
}
@locales
;
$type
= get_charset_from_ct_line(
$type
)
if
defined
$type
;
if
(
defined
$type
&&
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales
(
$type
,
@locales
))
{
$body
=
join
(
"\n"
,
@$body
);
if
(
$self
->are_more_high_bits_set (
$body
)) {
return
1;
}
}
0;
}
sub
check_for_mime {
my
(
$self
,
$pms
,
undef
,
$test
) =
@_
;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{
$test
};
return
$pms
->{
$test
} ? 1 : 0;
}
sub
check_for_mime_html {
my
(
$self
,
$pms
) =
@_
;
my
$ctype
=
$pms
->get(
'Content-Type'
);
return
1
if
$ctype
=~ m{^text/html}i;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{mime_body_html_count};
return
(
$pms
->{mime_body_html_count} > 0);
}
sub
check_for_mime_html_only {
my
(
$self
,
$pms
) =
@_
;
my
$ctype
=
$pms
->get(
'Content-Type'
);
return
1
if
$ctype
=~ m{^text/html}i;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{mime_body_html_count};
return
0
unless
exists
$pms
->{mime_body_text_count};
return
(
$pms
->{mime_body_html_count} > 0 &&
$pms
->{mime_body_text_count} == 0);
}
sub
check_mime_multipart_ratio {
my
(
$self
,
$pms
,
undef
,
$min
,
$max
) =
@_
;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{mime_multipart_ratio};
return
(
$pms
->{mime_multipart_ratio} >=
$min
&&
$pms
->{mime_multipart_ratio} <
$max
);
}
sub
_check_mime_header {
my
(
$self
,
$pms
,
$ctype
,
$cte
,
$cd
,
$charset
,
$name
) =
@_
;
$charset
||=
''
;
if
(
$ctype
eq
'text/html'
) {
$pms
->{mime_body_html_count}++;
}
elsif
(
$ctype
=~ m@^text
@i
) {
$pms
->{mime_body_text_count}++;
}
if
(
index
(
$cte
,
'base64'
) >= 0) {
$pms
->{mime_base64_count}++;
}
elsif
(
index
(
$cte
,
'quoted-printable'
) >= 0) {
$pms
->{mime_qp_count}++;
}
if
(
$cd
&&
index
(
$cd
,
'attachment'
) >= 0) {
$pms
->{mime_attachment}++;
}
if
(
$ctype
=~ /^text/ &&
index
(
$cte
,
'base64'
) >= 0 &&
(!
$charset
||
$charset
=~ /(?:us-ascii|ansi_x3\.4-1968|iso-ir-6|ansi_x3\.4-1986|iso_646\.irv:1991|ascii|iso646-us|us|ibm367|cp367|csascii)/) &&
!(
$cd
&&
$cd
=~ /^(?:attachment|inline)/))
{
$pms
->{mime_base64_encoded_text} = 1;
}
if
(
$charset
=~ /iso-\S+-\S+\b/i &&
$charset
!~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
{
$pms
->{mime_bad_iso_charset} = 1;
}
if
(
$charset
=~ /[a-z]/i) {
if
(
defined
$pms
->{mime_html_charsets}) {
$pms
->{mime_html_charsets} .=
" "
.
$charset
;
}
else
{
$pms
->{mime_html_charsets} =
$charset
;
}
if
(!
$pms
->{mime_faraway_charset}) {
my
@l
= Mail::SpamAssassin::Util::get_my_locales(
$self
->{main}->{conf}->{ok_locales});
if
(!(
grep
{
$_
eq
"all"
}
@l
) &&
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales(
$charset
,
@l
))
{
dbg (
"mimeeval: $charset is not ok for @l"
);
$pms
->{mime_faraway_charset} = 1;
}
}
}
}
sub
_check_attachments {
my
(
$self
,
$pms
) =
@_
;
my
$where
= -1;
my
$qp_bytes
= 0;
my
$qp_count
= 0;
my
@part_bytes
;
my
@part_type
;
my
$normal_chars
= 0;
my
$unicode_chars
= 0;
my
$part
= -1;
$pms
->{mime_checked_attachments} = 1;
$pms
->{mime_base64_count} = 0;
$pms
->{mime_base64_encoded_text} = 0;
$pms
->{mime_body_html_count} = 0;
$pms
->{mime_body_text_count} = 0;
$pms
->{mime_faraway_charset} = 0;
$pms
->{mime_missing_boundary} = 0;
$pms
->{mime_multipart_alternative} = 0;
$pms
->{mime_multipart_ratio} = 1.0;
$pms
->{mime_qp_count} = 0;
$pms
->{mime_qp_long_line} = 0;
$pms
->{mime_qp_ratio} = 0;
$pms
->{mime_ascii_text_illegal} = 0;
$pms
->{mime_text_unicode_ratio} = 0;
foreach
my
$p
(
$pms
->{msg}->find_parts(
qr/./
)) {
my
(
$ctype
,
$boundary
,
$charset
,
$name
) = Mail::SpamAssassin::Util::parse_content_type(
$p
->get_header(
"content-type"
));
if
(
$ctype
eq
'multipart/alternative'
) {
$pms
->{mime_multipart_alternative} = 1;
}
my
$cte
=
$p
->get_header(
'Content-Transfer-Encoding'
) ||
''
;
chomp
(
$cte
=
defined
(
$cte
) ?
lc
$cte
:
""
);
my
$cd
=
$p
->get_header(
'Content-Disposition'
) ||
''
;
chomp
(
$cd
=
defined
(
$cd
) ?
lc
$cd
:
""
);
$charset
=
lc
$charset
if
(
$charset
);
$name
=
lc
$name
if
(
$name
);
$self
->_check_mime_header(
$pms
,
$ctype
,
$cte
,
$cd
,
$charset
,
$name
);
if
(!
$p
->is_leaf()) {
next
;
}
$part
++;
$part_type
[
$part
] =
$ctype
;
$part_bytes
[
$part
] = 0
if
index
(
$cd
,
'attachment'
) == -1;
my
$cte_is_base64
=
index
(
$cte
,
'base64'
) >= 0;
my
$previous
=
''
;
foreach
(@{
$p
->raw()}) {
if
(
$pms
->{mime_multipart_alternative} &&
index
(
$cd
,
'attachment'
) == -1 &&
(
$ctype
eq
'text/plain'
||
$ctype
eq
'text/html'
)) {
$part_bytes
[
$part
] +=
length
;
}
if
(
$where
!= 1 &&
$cte
eq
"quoted-printable"
&&
index
(
$_
,
'SPAM: '
) != 0) {
if
(
length
> 78+1) {
$pms
->{mime_qp_long_line} = 1;
}
$qp_bytes
+=
length
;
if
(
index
(
$_
,
'='
) >= 0) {
my
$qp
= () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g;
if
(
$qp
) {
$qp_count
+=
$qp
;
my
(
$trailing
) = m/((?:=09|=20)+)\s*$/g;
if
(
$trailing
) {
$qp_count
-= (
length
(
$trailing
) / 3);
}
}
}
}
if
(
$ctype
eq
'text/plain'
&& (!
defined
$charset
||
$charset
eq
'us-ascii'
)) {
if
(m/[\x00\x0d\x80-\xff]+/) {
if
(would_log(
'dbg'
,
'eval'
)) {
my
$str
=
$_
;
$str
=~ s/([\x00\x0d\x80-\xff]+)/
'<'
.
unpack
(
'H*'
, $1) .
'>'
/eg;
dbg(
"check: ascii_text_illegal: matches "
.
$str
.
"\n"
);
}
$pms
->{mime_ascii_text_illegal}++;
}
}
if
(
$ctype
eq
'text/plain'
&& (
$cte
eq
''
||
$cte
eq
'7bit'
||
$cte
eq
'8bit'
)) {
my
(
$text
,
$subs
) =
$_
;
$subs
=
$text
=~ s/&
$normal_chars
+=
length
(
$text
);
$unicode_chars
+=
$subs
;
if
(
$subs
&& would_log(
'dbg'
,
'eval'
)) {
my
$str
=
$_
;
$str
=
substr
(
$str
, 0, 512) .
'...'
if
(
length
(
$str
) > 512);
dbg(
"check: abundant_unicode: "
.
$str
.
" ("
.
$subs
.
")\n"
);
}
}
$previous
=
$_
;
}
}
if
(
$qp_bytes
) {
$pms
->{mime_qp_ratio} =
$qp_count
/
$qp_bytes
;
$pms
->{mime_qp_count} =
$qp_count
;
$pms
->{mime_qp_bytes} =
$qp_bytes
;
}
if
(
$normal_chars
) {
$pms
->{mime_text_unicode_ratio} =
$unicode_chars
/
$normal_chars
;
}
if
(
$pms
->{mime_multipart_alternative}) {
my
$text
;
my
$html
;
for
(
my
$i
=
$part
;
$i
>= 0;
$i
--) {
next
if
!
defined
$part_bytes
[
$i
];
if
(!
defined
(
$html
) &&
$part_type
[
$i
] eq
'text/html'
) {
$html
=
$part_bytes
[
$i
];
}
elsif
(!
defined
(
$text
) &&
$part_type
[
$i
] eq
'text/plain'
) {
$text
=
$part_bytes
[
$i
];
}
last
if
(
defined
(
$html
) &&
defined
(
$text
));
}
if
(
defined
(
$text
) &&
defined
(
$html
) &&
$html
> 0) {
$pms
->{mime_multipart_ratio} = (
$text
/
$html
);
}
}
foreach
my
$val
(
values
%{
$pms
->{msg}->{mime_boundary_state}}) {
if
(
$val
!= 0) {
$pms
->{mime_missing_boundary} = 1;
last
;
}
}
}
Hide Show 6 lines of Pod
sub
has_check_qp_ratio { 1 }
Hide Show 9 lines of Pod
sub
check_qp_ratio {
my
(
$self
,
$pms
,
undef
,
$min
) =
@_
;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
return
0
unless
exists
$pms
->{mime_qp_ratio};
my
$qp_ratio
=
$pms
->{mime_qp_ratio};
dbg(
"eval: qp_ratio - %s - check for min of %s"
,
$qp_ratio
,
$min
);
return
(
defined
$qp_ratio
&&
$qp_ratio
>=
$min
) ? 1 : 0;
}
sub
check_msg_parse_flags {
my
(
$self
,
$pms
,
$type
,
$type2
) =
@_
;
$type
=
$type2
if
ref
(
$type
);
return
defined
$pms
->{msg}->{
$type
};
}
sub
check_for_uppercase {
my
(
$self
,
$pms
,
$body
,
$min
,
$max
) =
@_
;
local
(
$_
);
if
(
exists
$pms
->{uppercase}) {
return
(
$pms
->{uppercase} >
$min
&&
$pms
->{uppercase} <=
$max
);
}
if
(
$self
->body_charset_is_likely_to_fp(
$pms
)) {
$pms
->{uppercase} = 0;
return
0;
}
my
$len
= 0;
my
$lower
= 0;
my
$upper
= 0;
foreach
(@{
$body
}) {
next
unless
/\S\s+\S/;
next
if
/^(?:[A-Za-z0-9+\/=]{60,76} ){2}/;
my
$line
=
$_
;
$line
=~ s/\x1b\
$B
.*\x1b\(B//gs;
$len
+=
length
(
$line
);
$lower
+= (
$line
=~
tr
/a-z0-9//d);
$upper
+= (
$line
=~
tr
/A-Z//);
}
if
(
$len
< 200) {
$pms
->{uppercase} = 0;
return
0;
}
if
((
$upper
+
$lower
) == 0) {
$pms
->{uppercase} = 0;
}
else
{
$pms
->{uppercase} = (
$upper
/ (
$upper
+
$lower
)) * 100;
}
return
(
$pms
->{uppercase} >
$min
&&
$pms
->{uppercase} <=
$max
);
}
sub
body_charset_is_likely_to_fp {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_attachments(
$pms
)
unless
exists
$pms
->{mime_checked_attachments};
my
@charsets
;
my
$type
=
$pms
->get(
'Content-Type'
,
undef
);
$type
= get_charset_from_ct_line(
$type
)
if
defined
$type
;
push
(
@charsets
,
$type
)
if
defined
$type
;
if
(
defined
$pms
->{mime_html_charsets}) {
push
(
@charsets
,
split
(
' '
,
$pms
->{mime_html_charsets}));
}
my
$CHARSETS_LIKELY_TO_FP_AS_CAPS
= CHARSETS_LIKELY_TO_FP_AS_CAPS;
foreach
my
$charset
(
@charsets
) {
if
(
$charset
=~ /^${CHARSETS_LIKELY_TO_FP_AS_CAPS}$/) {
return
1;
}
}
return
0;
}
sub
get_charset_from_ct_line {
my
$type
=
shift
;
if
(!
defined
$type
) {
return
; }
if
(
$type
=~ /charset=
"([^"
]+)"/i) {
return
$1; }
if
(
$type
=~ /charset=
'([^'
]+)'/i) {
return
$1; }
if
(
$type
=~ /charset=(\S+)/i) {
return
$1; }
return
;
}
sub
check_ma_non_text {
my
(
$self
,
$pms
) =
@_
;
foreach
my
$map
(
$pms
->{msg}->find_parts(
qr@^multipart/alternative$@
)) {
foreach
my
$p
(
$map
->find_parts(
qr/./
, 1, 0)) {
next
if
(
lc
$p
->{
'type'
} eq
'multipart/related'
);
next
if
(
lc
$p
->{
'type'
} eq
'application/rtf'
);
next
if
(
$p
->{
'type'
} =~ m@^text/
@i
);
return
1;
}
}
return
0;
}
sub
check_base64_length {
my
$self
=
shift
;
my
$pms
=
shift
;
shift
;
my
$min
=
shift
;
my
$max
=
shift
;
if
(!
defined
$pms
->{base64_length}) {
$pms
->{base64_length} =
$self
->_check_base64_length(
$pms
->{msg});
}
return
0
if
(
defined
$max
&&
$pms
->{base64_length} >
$max
);
return
$pms
->{base64_length} >=
$min
;
}
sub
_check_base64_length {
my
$self
=
shift
;
my
$msg
=
shift
;
my
$result
= 0;
foreach
my
$p
(
$msg
->find_parts(
qr@.@
, 1)) {
my
$ctype
=
Mail::SpamAssassin::Util::parse_content_type(
$p
->get_header(
'content-type'
));
next
if
(
$ctype
eq
'application/ics'
);
my
$cte
=
lc
(
$p
->get_header(
'content-transfer-encoding'
) ||
''
);
next
if
(
$cte
!~ /^base64$/);
foreach
my
$l
( @{
$p
->raw()} ) {
$result
=
length
$l
if
length
$l
>
$result
;
}
}
return
$result
;
}
1;