#!/usr/bin/perl -w
sub
usage {
die
"
parse-rules-
for
-masses: parse the SpamAssassin rules files
for
mass-checks,
evolving, and frequency analysis
usage: ./parse-rules-
for
-masses [-d rulesdir] [-o outputfile] [-s scoreset] [-x]
rulesdir defaults to ../rules
outputfile defaults to ./tmp/rules.pl
scoreset
default
to 0
-x
do
not include test rules files (ie 70_*)
";
}
our
(
@rulesdirs
,
$outputfile
,
$scoreset
,
$skip_test_rules
);
GetOptions (
"d=s"
=> \
@rulesdirs
,
"o=s"
=> \
$outputfile
,
"s=i"
=> \
$scoreset
,
"x"
=> \
$skip_test_rules
,
"help|h|?"
=>
sub
{ usage(); } );
if
(
$#rulesdirs
< 0) {
@rulesdirs
= (
"../rules"
);
}
if
(!
defined
$outputfile
) {
$outputfile
=
"./tmp/rules.pl"
;
mkdir
(
"tmp"
, 0755);
}
$scoreset
= 0
if
( !
defined
$scoreset
);
my
$rules
= { };
$rules
->{_scoreset} =
$scoreset
;
readrules(
@rulesdirs
);
my
$scores
= { };
foreach
my
$key
(
keys
%{
$rules
}) {
next
if
$key
eq
'_scoreset'
;
$scores
->{
$key
} =
$rules
->{
$key
}->{score};
}
writerules(
$outputfile
);
exit
;
sub
readrules {
foreach
my
$indir
(
@_
) {
my
@files
= <
$indir
/*.cf>;
my
$file
;
my
$scores_mutable
= 1;
my
%rulesfound
= ();
my
%langs
= ();
foreach
$file
(
sort
@files
) {
$scores_mutable
= 1;
if
(
$skip_test_rules
) {
next
if
(
$file
=~ /70_/);
}
open
(IN,
"<$file"
);
while
(<IN>)
{
if
(/<\/gen:mutable>/i) {
$scores_mutable
= 0;
}
elsif
(/<gen:mutable>/i) {
$scores_mutable
= 1;
}
s/
my
$lang
=
''
;
if
(s/^lang\s+(\S+)\s+//) {
$lang
= $1;
}
if
(/^(header|rawbody|body|full|uri|askdns|meta|mimeheader|reuse)\s+(\S+)\s+(.*)$/) {
my
$type
= $1;
my
$name
= $2;
my
$val
= $3;
if
(
exists
$rules
->{
$name
}->{type} &&
$type
eq
'reuse'
) {
next
;
}
$rules
->{
$name
} ||= { };
$rules
->{
$name
}->{type} =
$type
;
$rules
->{
$name
}->{lang} =
$lang
;
$rules
->{
$name
}->{issubrule} = (
$name
=~ /^__/) ?
'1'
:
'0'
;
$rules
->{
$name
}->{tflags} =
''
;
$rules
->{
$name
}->{
eval
} = (
$val
=~ /\beval:(\w+)/) ? $1 :
'0'
;
if
(
$type
eq
"meta"
) {
my
@depends
=
grep
{ !/^\d+$/ } (
$val
=~ m/(\w+)/g);
push
(@{
$rules
->{
$name
}->{depends} },
@depends
);
}
$rules
->{
$name
}->{code} =
$val
;
}
elsif
(/^describe\s+(\S+)\s+(.+)$/) {
$rules
->{$1} ||= { };
if
(
$lang
) {
$rules
->{$1}->{describe} ||= $2;
}
else
{
$rules
->{$1}->{describe} = $2;
}
}
elsif
(/^tflags\s+(\S+)\s+(.+)$/) {
$rules
->{$1} ||= { };
$rules
->{$1}->{tflags} = $2;
}
elsif
(/^score\s+(\S+)\s+(.+)$/) {
my
(
$name
,
$score
) = ($1,$2);
$rules
->{
$name
} ||= { };
if
(
$score
=~ /\s/ ) {
(
$score
) = (
split
(/\s+/,
$score
))[
$scoreset
];
}
$rules
->{
$name
}->{score} =
$score
;
$rules
->{
$name
}->{mutable} =
$scores_mutable
;
}
}
close
IN;
}
}
foreach
my
$rule
(
keys
%{
$rules
}) {
next
if
(
$rule
eq
'_scoreset'
);
if
(!
defined
$rules
->{
$rule
}->{type}) {
delete
$rules
->{
$rule
};
next
;
}
my
$tflags
=
$rules
->{
$rule
}->{tflags};
if
(!
defined
$rules
->{
$rule
}->{score}) {
my
$def
= 1.0;
if
(
$rule
=~ /^T_/) {
$def
= 0.01;
}
if
(
$tflags
=~ /\bnice\b/) {
$rules
->{
$rule
}->{score} = -
$def
;
}
else
{
$rules
->{
$rule
}->{score} =
$def
;
}
$rules
->{
$rule
}->{no_score_found} = 1;
}
if
(
$tflags
=~ /\bnet\b/ && (
$scoreset
& 1) == 0) {
$rules
->{
$rule
}->{mutable} = 0;
$rules
->{
$rule
}->{score} = 0;
}
if
(
$tflags
=~ /\blearn\b/ && (
$scoreset
& 2) == 0) {
$rules
->{
$rule
}->{mutable} = 0;
$rules
->{
$rule
}->{score} = 0;
}
if
(!
defined
$rules
->{
$rule
}->{mutable}) {
$rules
->{
$rule
}->{mutable} = 1;
}
if
(
$rule
=~ /^T_/) {
$rules
->{
$rule
}->{mutable} = 0;
}
elsif
(
$rule
eq
'AWL'
) {
$rules
->{
$rule
}->{mutable} = 0;
$rules
->{
$rule
}->{score} = 0;
}
}
}
sub
writerules {
my
$outfile
=
shift
;
system
(
"mkdir -p $outfile 2>/dev/null ; rmdir $outfile 2>/dev/null"
);
open
(OUT,
">$outfile"
) or
die
"cannot write to $outfile"
;
print
OUT
"# dumped at "
.`date`.
"\n"
;
$Data::Dumper::Purity
= 1;
print
OUT Data::Dumper->Dump ([
$rules
,
$scores
], [
'*rules'
,
'*scores'
]);
print
OUT
"1;"
;
close
OUT;
}