@ISA
=
qw(Mail::SpamAssassin::Plugin)
;
my
$VERSION
= 4.00;
sub
dbg {
my
$msg
=
shift
;
return
Mail::SpamAssassin::Logger::dbg(
"DecodeShortURLs: $msg"
,
@_
); }
sub
info {
my
$msg
=
shift
;
return
Mail::SpamAssassin::Logger::info(
"DecodeShortURLs: $msg"
,
@_
); }
sub
new {
my
$class
=
shift
;
my
$mailsaobject
=
shift
;
$class
=
ref
(
$class
) ||
$class
;
my
$self
=
$class
->SUPER::new(
$mailsaobject
);
bless
(
$self
,
$class
);
if
(
$mailsaobject
->{local_tests_only}) {
dbg(
"local tests only, disabling HTTP requests"
);
$self
->{net_disabled} = 1;
}
elsif
(!HAS_LWP_USERAGENT) {
dbg(
"module LWP::UserAgent not installed, disabling HTTP requests"
);
$self
->{net_disabled} = 1;
}
$self
->set_config(
$mailsaobject
->{conf});
$self
->register_method_priority (
'check_dnsbl'
, -10);
$self
->register_eval_rule(
'short_url'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_redir'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_200'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_404'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_code'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_chained'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_maxchain'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_loop'
,
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
);
$self
->register_eval_rule(
'short_url_tests'
);
return
$self
;
}
sub
set_config {
my
(
$self
,
$conf
) =
@_
;
my
@cmds
= ();
push
(
@cmds
, {
setting
=>
'url_shortener'
,
default
=> {},
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE
,
code
=>
sub
{
my
(
$self
,
$key
,
$value
,
$line
) =
@_
;
if
(
$value
eq
''
) {
return
$Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE
;
}
foreach
my
$domain
(
split
(/\s+/,
$value
)) {
$self
->{url_shortener}->{
lc
$domain
} = 1;
}
}
});
push
(
@cmds
, {
setting
=>
'url_shortener_get'
,
code
=>
sub
{
my
(
$self
,
$key
,
$value
,
$line
) =
@_
;
if
(
$value
eq
''
) {
return
$Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE
;
}
foreach
my
$domain
(
split
(/\s+/,
$value
)) {
$self
->{url_shortener}->{
lc
$domain
} = 2;
}
}
});
push
(
@cmds
, {
setting
=>
'clear_url_shortener'
,
code
=>
sub
{
my
(
$self
,
$key
,
$value
,
$line
) =
@_
;
if
(
$value
eq
''
) {
$self
->{url_shortener} = {};
}
else
{
foreach
my
$domain
(
split
(/\s+/,
$value
)) {
delete
$self
->{url_shortener}->{
lc
$domain
};
}
}
}
});
push
(
@cmds
, {
setting
=>
'url_shortener_cache_type'
,
default
=>
''
,
is_priv
=> 1,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
push
(
@cmds
, {
setting
=>
'url_shortener_cache_dsn'
,
default
=>
''
,
is_priv
=> 1,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
push
(
@cmds
, {
setting
=>
'url_shortener_cache_username'
,
default
=>
''
,
is_priv
=> 1,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
push
(
@cmds
, {
setting
=>
'url_shortener_cache_password'
,
default
=>
''
,
is_priv
=> 1,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
push
(
@cmds
, {
setting
=>
'url_shortener_cache_ttl'
,
is_admin
=> 1,
default
=> 86400,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
push
(
@cmds
, {
setting
=>
'url_shortener_cache_autoclean'
,
is_admin
=> 1,
default
=> 1000,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
push
(
@cmds
, {
setting
=>
'url_shortener_loginfo'
,
is_admin
=> 1,
default
=> 0,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
});
push
(
@cmds
, {
setting
=>
'url_shortener_timeout'
,
is_admin
=> 1,
default
=> 5,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
push
(
@cmds
, {
setting
=>
'max_short_urls'
,
is_admin
=> 1,
default
=> 10,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
push
(
@cmds
, {
setting
=>
'max_short_url_redirections'
,
is_admin
=> 1,
default
=> 10,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
push
(
@cmds
, {
setting
=>
'url_shortener_user_agent'
,
is_admin
=> 1,
default
=>
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
,
type
=>
$Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
$conf
->{parser}->register_commands(\
@cmds
);
}
sub
short_url_tests {
return
0;
}
sub
finish_parsing_start {
my
(
$self
,
$opts
) =
@_
;
if
(
$opts
->{conf}->{eval_to_rule}->{short_url_tests}) {
warn
"DecodeShortURLs: Legacy configuration format detected. "
.
"Eval function short_url_tests() is no longer supported, "
.
"please see documentation for the new rule format.\n"
;
}
}
sub
initialise_url_shortener_cache {
my
(
$self
,
$conf
) =
@_
;
return
if
$self
->{dbh};
return
if
!
$conf
->{url_shortener_cache_type};
if
(!
$conf
->{url_shortener_cache_dsn}) {
warn
"DecodeShortURLs: invalid cache configuration\n"
;
return
;
}
if
(
$conf
->{url_shortener_cache_type} =~ /^(?:dbi|sqlite)$/i
&&
$conf
->{url_shortener_cache_dsn} =~ /^dbi:SQLite/)
{
eval
{
local
$SIG
{
'__DIE__'
};
DBD::SQLite->VERSION(1.59_01);
$self
->{dbh} = DBI->connect_cached(
$conf
->{url_shortener_cache_dsn},
''
,
''
,
{
RaiseError
=> 1,
PrintError
=> 0,
InactiveDestroy
=> 1,
AutoCommit
=> 1}
);
$self
->{dbh}->
do
("
CREATE TABLE IF NOT EXISTS short_url_cache (
short_url TEXT PRIMARY KEY NOT NULL,
decoded_url TEXT NOT NULL,
hits INTEGER NOT NULL DEFAULT 1,
created INTEGER NOT NULL,
modified INTEGER NOT NULL
)
");
$self
->{sth_insert} =
$self
->{dbh}->prepare("
INSERT INTO short_url_cache (short_url, decoded_url, created, modified)
VALUES (?,?,strftime(
'%s'
,
'now'
),strftime(
'%s'
,
'now'
))
ON CONFLICT(short_url) DO UPDATE
SET decoded_url = excluded.decoded_url,
modified = excluded.modified,
hits = hits + 1
");
$self
->{sth_select} =
$self
->{dbh}->prepare("
SELECT decoded_url FROM short_url_cache
WHERE short_url = ?
");
$self
->{sth_delete} =
$self
->{dbh}->prepare("
DELETE FROM short_url_cache
WHERE short_url = ? AND created < strftime(
'%s'
,
'now'
) -
$conf
->{url_shortener_cache_ttl}
");
$self
->{sth_clean} =
$self
->{dbh}->prepare("
DELETE FROM short_url_cache
WHERE created < strftime(
'%s'
,
'now'
) -
$conf
->{url_shortener_cache_ttl}
");
};
}
elsif
(
lc
$conf
->{url_shortener_cache_type} eq
'dbi'
&&
$conf
->{url_shortener_cache_dsn} =~ /^dbi:(?:mysql|MariaDB)/i)
{
eval
{
local
$SIG
{
'__DIE__'
};
$self
->{dbh} = DBI->connect_cached(
$conf
->{url_shortener_cache_dsn},
$conf
->{url_shortener_cache_username},
$conf
->{url_shortener_cache_password},
{
RaiseError
=> 1,
PrintError
=> 0,
InactiveDestroy
=> 1,
AutoCommit
=> 1}
);
$self
->{sth_insert} =
$self
->{dbh}->prepare("
INSERT INTO short_url_cache (short_url, decoded_url, created, modified)
VALUES (?,?,UNIX_TIMESTAMP(),UNIX_TIMESTAMP())
ON DUPLICATE KEY UPDATE
decoded_url = VALUES(decoded_url),
modified = VALUES(modified),
hits = hits + 1
");
$self
->{sth_select} =
$self
->{dbh}->prepare("
SELECT decoded_url FROM short_url_cache
WHERE short_url = ?
");
$self
->{sth_delete} =
$self
->{dbh}->prepare("
DELETE FROM short_url_cache
WHERE short_url = ? AND created < UNIX_TIMESTAMP() -
$conf
->{url_shortener_cache_ttl}
");
$self
->{sth_clean} =
$self
->{dbh}->prepare("
DELETE FROM short_url_cache
WHERE created < UNIX_TIMESTAMP() -
$conf
->{url_shortener_cache_ttl}
");
};
}
elsif
(
lc
$conf
->{url_shortener_cache_type} eq
'dbi'
&&
$conf
->{url_shortener_cache_dsn} =~ /^dbi:Pg/i)
{
eval
{
local
$SIG
{
'__DIE__'
};
$self
->{dbh} = DBI->connect_cached(
$conf
->{url_shortener_cache_dsn},
$conf
->{url_shortener_cache_username},
$conf
->{url_shortener_cache_password},
{
RaiseError
=> 1,
PrintError
=> 0,
InactiveDestroy
=> 1,
AutoCommit
=> 1}
);
$self
->{sth_insert} =
$self
->{dbh}->prepare("
INSERT INTO short_url_cache (short_url, decoded_url, created, modified)
VALUES (?,?,CAST(EXTRACT(epoch FROM NOW()) AS INT),CAST(EXTRACT(epoch FROM NOW()) AS INT))
ON CONFLICT (short_url) DO UPDATE SET
decoded_url = EXCLUDED.decoded_url,
modified = EXCLUDED.modified,
hits = short_url_cache.hits + 1
");
$self
->{sth_select} =
$self
->{dbh}->prepare("
SELECT decoded_url FROM short_url_cache
WHERE short_url = ?
");
$self
->{sth_delete} =
$self
->{dbh}->prepare("
DELETE FROM short_url_cache
WHERE short_url ? = AND created < CAST(EXTRACT(epoch FROM NOW()) AS INT) -
$conf
->{url_shortener_cache_ttl}
");
$self
->{sth_clean} =
$self
->{dbh}->prepare("
DELETE FROM short_url_cache
WHERE created < CAST(EXTRACT(epoch FROM NOW()) AS INT) -
$conf
->{url_shortener_cache_ttl}
");
};
}
else
{
warn
"DecodeShortURLs: invalid cache configuration\n"
;
return
;
}
if
($@ || !
$self
->{sth_clean}) {
warn
"DecodeShortURLs: cache connect failed: $@\n"
;
undef
$self
->{dbh};
undef
$self
->{sth_insert};
undef
$self
->{sth_select};
undef
$self
->{sth_delete};
undef
$self
->{sth_clean};
}
}
sub
short_url {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url} ? 1 : 0;
}
sub
short_url_redir {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url_redir} ? 1 : 0;
}
sub
short_url_200 {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url_200} ? 1 : 0;
}
sub
short_url_404 {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url_404} ? 1 : 0;
}
sub
short_url_code {
my
(
$self
,
$pms
,
undef
,
$code
) =
@_
;
$self
->_check_short(
$pms
);
return
0
unless
defined
$code
&&
$code
=~ /^\d{3}$/;
return
$pms
->{
"short_url_$code"
} ? 1 : 0;
}
sub
short_url_chained {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url_chained} ? 1 : 0;
}
sub
short_url_maxchain {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url_maxchain} ? 1 : 0;
}
sub
short_url_loop {
my
(
$self
,
$pms
) =
@_
;
$self
->_check_short(
$pms
);
return
$pms
->{short_url_loop} ? 1 : 0;
}
sub
_check_shortener_uri {
my
(
$uri
,
$conf
) =
@_
;
local
($1,$2);
return
0
unless
$uri
=~ m{^
https?://
(?:[^\@/?
([^/?
(?::\d+)?
(.*?\w)?
}ix;
my
$host
=
lc
$1;
my
$has_path
=
defined
$2;
my
$levels
=
$host
=~
tr
/.//;
return
if
$levels
== 1 && !
$has_path
;
if
(
exists
$conf
->{url_shortener}->{
$host
}) {
return
{
'uri'
=>
$uri
,
'method'
=>
$conf
->{url_shortener}->{
$host
} == 1 ?
'head'
:
'get'
,
};
}
elsif
(
$levels
== 2 &&
$host
=~ /^(?!www)[^.]+(\.[^.]+\.[^.]+)$/i &&
exists
$conf
->{url_shortener}->{$1}) {
return
{
'uri'
=>
$uri
,
'method'
=>
$conf
->{url_shortener}->{$1} == 1 ?
'head'
:
'get'
,
};
}
return
;
}
sub
check_dnsbl {
my
(
$self
,
$opts
) =
@_
;
$self
->_check_short(
$opts
->{permsgstatus});
}
sub
_check_short {
my
(
$self
,
$pms
) =
@_
;
return
if
$pms
->{short_url_checked}++;
my
$conf
=
$pms
->{conf};
my
%short_urls
;
my
$uris
=
$pms
->get_uri_detail_list();
while
(
my
(
$uri
,
$info
) =
each
%{
$uris
}) {
next
unless
$info
->{domains} &&
$info
->{cleaned};
if
(
my
$short_url_info
= _check_shortener_uri(
$uri
,
$conf
)) {
$short_urls
{
$uri
} =
$short_url_info
;
last
if
scalar
keys
%short_urls
>=
$conf
->{max_short_urls};
}
}
return
unless
%short_urls
;
$pms
->{short_url} = 1;
return
if
$self
->{net_disabled};
return
if
!
$conf
->{max_short_urls};
$self
->initialise_url_shortener_cache(
$conf
);
my
$ua
= LWP::UserAgent->new(
'agent'
=>
$conf
->{url_shortener_user_agent},
'max_redirect'
=> 0,
'timeout'
=>
$conf
->{url_shortener_timeout},
);
$ua
->env_proxy;
foreach
my
$uri
(
keys
%short_urls
) {
$self
->recursive_lookup(
$short_urls
{
$uri
},
$pms
,
$ua
);
}
if
(
$self
->{dbh} &&
$conf
->{url_shortener_cache_autoclean}
&&
rand
() < 1/
$conf
->{url_shortener_cache_autoclean})
{
dbg(
"cleaning stale cache entries"
);
eval
{
$self
->{sth_clean}->execute(); };
if
($@) { dbg(
"cache cleaning failed: $@"
); }
}
}
sub
recursive_lookup {
my
(
$self
,
$short_url_info
,
$pms
,
$ua
,
%been_here
) =
@_
;
my
$conf
=
$pms
->{conf};
my
$count
=
scalar
keys
%been_here
;
dbg(
"redirection count $count"
)
if
$count
;
if
(
$count
>=
$conf
->{max_short_url_redirections}) {
dbg(
"found more than $conf->{max_short_url_redirections} shortener redirections"
);
$pms
->{short_url_maxchain} = 1;
return
;
}
my
$short_url
=
$short_url_info
->{uri};
my
$location
;
if
(
defined
(
$location
=
$self
->cache_get(
$short_url
))) {
if
(
$conf
->{url_shortener_loginfo}) {
info(
"found cached $short_url => $location"
);
}
else
{
dbg(
"found cached $short_url => $location"
);
}
if
(
$location
=~ /^\d{3}$/) {
$pms
->{
"short_url_$location"
} = 1;
$self
->cache_add(
$short_url
,
$location
);
return
;
}
}
else
{
my
$method
=
$short_url_info
->{method};
my
$response
=
$ua
->
$method
(
$short_url
);
if
(!
$response
->is_redirect) {
dbg(
"URL is not redirect: $short_url = "
.
$response
->status_line);
my
$rcode
=
$response
->code;
if
(
$rcode
=~ /^\d{3}$/) {
$pms
->{
"short_url_$rcode"
} = 1;
$self
->cache_add(
$short_url
,
$rcode
);
}
return
;
}
$location
=
$response
->headers->{location};
if
(
$self
->{url_shortener_loginfo}) {
info(
"found $short_url => $location"
);
}
else
{
dbg(
"found $short_url => $location"
);
}
}
$self
->cache_add(
$short_url
,
$location
);
if
(
$short_url
eq
$location
) {
dbg(
"URL is redirect to itself"
);
return
;
}
$pms
->{short_url_redir} = 1;
$pms
->{short_url_chained} = 1
if
$count
;
if
(
$location
!~ m{^[a-z]+://}i) {
my
$orig_location
=
$location
;
my
$orig_short_url
=
$short_url
;
if
(
index
(
$location
,
'/'
) == 0) {
$short_url
=~ s{^([a-z]+://.*?)[/?
}
else
{
$short_url
=~ s{^([a-z]+://.*)/}{$1};
}
$location
=
"$short_url/$location"
;
dbg(
"looks like a local redirection: $orig_short_url => $location ($orig_location)"
);
$pms
->add_uri_detail_list(
$location
)
if
!
$pms
->{uri_detail_list}->{
$location
};
return
;
}
if
(
exists
$been_here
{
$location
}) {
dbg(
"error: loop detected: $location"
);
$pms
->{short_url_loop} = 1;
return
;
}
$been_here
{
$location
} = 1;
$pms
->add_uri_detail_list(
$location
)
if
!
$pms
->{uri_detail_list}->{
$location
};
if
(
my
$short_url_info
= _check_shortener_uri(
$location
,
$conf
)) {
$self
->recursive_lookup(
$short_url_info
,
$pms
,
$ua
,
%been_here
);
}
}
sub
cache_add {
my
(
$self
,
$short_url
,
$decoded_url
) =
@_
;
return
if
!
$self
->{dbh};
return
if
length
(
$short_url
) > 256 ||
length
(
$decoded_url
) > 512;
eval
{
$self
->{sth_insert}->execute(
$short_url
,
$decoded_url
); };
if
($@) {
dbg(
"could not add to cache: $@"
);
}
return
;
}
sub
cache_get {
my
(
$self
,
$key
) =
@_
;
return
if
!
$self
->{dbh};
eval
{
$self
->{sth_delete}->execute(
$key
); };
if
($@) {
dbg(
"cache delete failed: $@"
);
return
;
}
eval
{
$self
->{sth_select}->execute(
$key
); };
if
($@) {
dbg(
"cache get failed: $@"
);
return
;
}
my
@row
=
$self
->{sth_select}->fetchrow_array();
if
(
@row
) {
return
$row
[0];
}
return
;
}
sub
has_short_url { 1 }
sub
has_autoclean { 1 }
sub
has_short_url_code { 1 }
sub
has_user_agent { 1 }
sub
has_get { 1 }
sub
has_clear { 1 }
sub
has_timeout { 1 }
sub
has_max_redirections { 1 }
sub
has_short_url_redir { 1 }
1;