The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
# -------------------------------------------------------
# ImageInfo Plugin for SpamAssassin
# Version: 0.7
# Created: 2006-08-02
# Modified: 2007-01-17
#
# Changes:
# 0.7 - added image_name_regex to allow pattern matching on the image name
# - added support for image/pjpeg content types (progressive jpeg)
# - updated imageinfo.cf with a few sample rules for using image_name_regex()
# 0.6 - fixed dems_ bug in image_size_range_
# 0.5 - added image_named and image_to_text_ratio
# 0.4 - added image_size_exact and image_size_range
# 0.3 - added jpeg support
# 0.2 - optimized by theo
# 0.1 - added gif/png support
#
#
# Usage:
# image_count()
#
# body RULENAME eval:image_count(<type>,<min>,[max])
# type: 'all','gif','png', or 'jpeg'
# min: required, message contains at least this
# many images
# max: optional, if specified, message must not
# contain more than this number of images
#
# image_count() examples
#
# body ONE_IMAGE eval:image_count('all',1,1)
# body ONE_OR_MORE_IMAGES eval:image_count('all',1)
# body ONE_PNG eval:image_count('png',1,1)
# body TWO_GIFS eval:image_count('gif',2,2)
# body MANY_JPEGS eval:image_count('gif',5)
#
# pixel_coverage()
#
# body RULENAME eval:pixel_coverage(<type>,<min>,[max])
# type: 'all','gif','png', or 'jpeg'
# min: required, message contains at least this
# much pixel area
# max: optional, if specified, message must not
# contain more than this much pixel area
#
# pixel_coverage() examples
#
# body LARGE_IMAGE_AREA eval:pixel_coverage('all',150000) # catches any images that are 150k pixel/sq or higher
# body SMALL_GIF_AREA eval:pixel_coverage('gif',1,40000) # catches only gifs that 1 to 40k pixel/sql
#
# image_name_regex()
#
# body RULENAME eval:image_name_regex(<regex>)
# regex: full quoted regexp, see examples below
#
# image_name_regex() examples
#
# body CG_DOUBLEDOT_GIF eval:image_name_regex('/^\w{2,9}\.\.gif$/i') # catches double dot gifs abcd..gif
#
#
#
# -------------------------------------------------------
use strict;
# use bytes;
use re 'taint';
our @ISA = qw(Mail::SpamAssassin::Plugin);
# constructor: register the eval rule
sub new {
my $class = shift;
my $mailsaobject = shift;
# some boilerplate...
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
$self->register_eval_rule ("image_count", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
$self->register_eval_rule ("pixel_coverage", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
$self->register_eval_rule ("image_size_exact", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
$self->register_eval_rule ("image_size_range", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
$self->register_eval_rule ("image_named", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
$self->register_eval_rule ("image_name_regex", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
$self->register_eval_rule ("image_to_text_ratio", $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
return $self;
}
# -----------------------------------------
my %get_details = (
'gif' => sub {
my ($pms, $part) = @_;
my $header = $part->decode(13);
# make sure this is actually a valid gif..
return unless $header =~ s/^GIF(8[79]a)//;
my $version = $1;
my ($width, $height, $packed, $bgcolor, $aspect) = unpack("vvCCC", $header);
my $color_table_size = 1 << (($packed & 0x07) + 1);
# for future enhancements
#my $global_color_table = $packed & 0x80;
#my $has_global_color_table = $global_color_table ? 1 : 0;
#my $sorted_colors = ($packed & 0x08)?1:0;
#my $resolution = ((($packed & 0x70) >> 4) + 1);
if ($height && $width) {
my $area = $width * $height;
$pms->{imageinfo}->{pc_gif} += $area;
$pms->{imageinfo}->{dems_gif}->{"${height}x${width}"} = 1;
$pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
dbg("imageinfo: gif image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.), with $color_table_size color table");
}
},
'png' => sub {
my ($pms, $part) = @_;
my $data = $part->decode();
return unless (substr($data, 0, 8) eq "\x89PNG\x0d\x0a\x1a\x0a");
my $datalen = length $data;
my $pos = 8;
my $chunksize = 8;
my ($width, $height) = ( 0, 0 );
my ($depth, $ctype, $compression, $filter, $interlace);
while ($pos < $datalen) {
my ($len, $type) = unpack("Na4", substr($data, $pos, $chunksize));
$pos += $chunksize;
last if $type eq "IEND"; # end of png image.
next unless ( $type eq "IHDR" && $len == 13 );
my $bytes = substr($data, $pos, $len + 4);
my $crc = unpack("N", substr($bytes, -4, 4, ""));
if ($type eq "IHDR" && $len == 13) {
($width, $height, $depth, $ctype, $compression, $filter, $interlace) = unpack("NNCCCCC", $bytes);
last;
}
}
if ($height && $width) {
my $area = $width * $height;
$pms->{imageinfo}->{pc_png} += $area;
$pms->{imageinfo}->{dems_png}->{"${height}x${width}"} = 1;
$pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
dbg("imageinfo: png image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.)");
}
},
'jpeg' => sub {
my ($pms, $part) = @_;
my $data = $part->decode();
my $index = substr($data, 0, 2);
return unless $index eq "\xFF\xD8";
my $pos = 2;
my $chunksize = 4;
my ($prec, $height, $width, $comps) = (undef,0,0,undef);
while (1) {
my ($xx, $mark, $len) = unpack("CCn", substr($data, $pos, $chunksize));
last if (!defined $xx || $xx != 0xFF);
last if (!defined $mark || $mark == 0xDA || $mark == 0xD9);
last if (!defined $len || $len < 2);
$pos += $chunksize;
my $block = substr($data, $pos, $len - 2);
my $blocklen = length($block);
if ( ($mark >= 0xC0 && $mark <= 0xC3) || ($mark >= 0xC5 && $mark <= 0xC7) ||
($mark >= 0xC9 && $mark <= 0xCB) || ($mark >= 0xCD && $mark <= 0xCF) ) {
($prec, $height, $width, $comps) = unpack("CnnC", substr($block, 0, 6, ""));
last;
}
$pos += $blocklen;
}
if ($height && $width) {
my $area = $height * $width;
$pms->{imageinfo}->{pc_jpeg} += $area;
$pms->{imageinfo}->{dems_jpeg}->{"${height}x${width}"} = 1;
$pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
dbg("imageinfo: jpeg image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.)");
}
},
);
sub _get_images {
my ($self,$pms) = @_;
my $result = 0;
foreach my $type ( 'all', keys %get_details ) {
$pms->{'imageinfo'}->{"pc_$type"} = 0;
$pms->{'imageinfo'}->{"count_$type"} = 0;
}
foreach my $p ($pms->{msg}->find_parts(qr@^image/(?:gif|png|jpe?g)$@, 1)) {
# make sure its base64 encoded
my $cte = lc($p->get_header('content-transfer-encoding') || '');
next if ($cte !~ /^base64$/);
my ($type) = $p->{'type'} =~ m@/(\w+)$@;
$type = 'jpeg' if $type eq 'jpg';
if ($type && exists $get_details{$type}) {
$get_details{$type}->($pms,$p);
$pms->{'imageinfo'}->{"count_$type"} ++;
}
}
foreach my $name ( keys %{$pms->{'imageinfo'}->{"names_all"}} ) {
dbg("imageinfo: image name $name found");
}
foreach my $type ( keys %get_details ) {
$pms->{'imageinfo'}->{'pc_all'} += $pms->{'imageinfo'}->{"pc_$type"};
$pms->{'imageinfo'}->{'count_all'} += $pms->{'imageinfo'}->{"count_$type"};
foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}} ) {
dbg("imageinfo: adding $dem to dems_all");
$pms->{'imageinfo'}->{'dems_all'}->{$dem} = 1;
}
}
}
# -----------------------------------------
sub image_named {
my ($self,$pms,$body,$name) = @_;
return 0 unless (defined $name);
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
return 1 if (exists $pms->{'imageinfo'}->{"names_all"}->{$name});
return 0;
}
# -----------------------------------------
sub image_name_regex {
my ($self,$pms,$body,$re) = @_;
return 0 unless (defined $re);
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
my $hit = 0;
foreach my $name (keys %{$pms->{'imageinfo'}->{"names_all"}}) {
dbg("imageinfo: checking image named $name against regex $re");
if (eval { $name =~ /$re/ }) { $hit = 1 }
dbg("imageinfo: error in regex /$re/ - $@") if $@;
if ($hit) {
dbg("imageinfo: image_name_regex hit on $name");
return 1;
}
}
return 0;
}
# -----------------------------------------
sub image_count {
my ($self,$pms,$body,$type,$min,$max) = @_;
return 0 unless defined $min;
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
# dbg("imageinfo: count: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"count_$type"});
return result_check($min, $max, $pms->{'imageinfo'}->{"count_$type"});
}
# -----------------------------------------
sub pixel_coverage {
my ($self,$pms,$body,$type,$min,$max) = @_;
return 0 unless (defined $type && defined $min);
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
# dbg("imageinfo: pc_$type: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"pc_$type"});
return result_check($min, $max, $pms->{'imageinfo'}->{"pc_$type"});
}
# -----------------------------------------
sub image_to_text_ratio {
my ($self,$pms,$body,$type,$min,$max) = @_;
return 0 unless (defined $type && defined $min && defined $max);
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
# depending on how you call this eval (body vs rawbody),
# the $textlen will differ.
my $textlen = length(join('',@$body));
return 0 unless ( $textlen > 0 && exists $pms->{'imageinfo'}->{"pc_$type"} && $pms->{'imageinfo'}->{"pc_$type"} > 0);
my $ratio = $textlen / $pms->{'imageinfo'}->{"pc_$type"};
dbg("imageinfo: image ratio=$ratio, min=$min max=$max");
return result_check($min, $max, $ratio, 1);
}
# -----------------------------------------
sub image_size_exact {
my ($self,$pms,$body,$type,$height,$width) = @_;
return 0 unless (defined $type && defined $height && defined $width);
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
return 0 unless (exists $pms->{'imageinfo'}->{"dems_$type"});
return 1 if (exists $pms->{'imageinfo'}->{"dems_$type"}->{"${height}x${width}"});
return 0;
}
# -----------------------------------------
sub image_size_range {
my ($self,$pms,$body,$type,$minh,$minw,$maxh,$maxw) = @_;
return 0 unless (defined $type && defined $minh && defined $minw);
# make sure we have image data read in.
if (!exists $pms->{'imageinfo'}) {
$self->_get_images($pms);
}
my $name = 'dems_'.$type;
return 0 unless (exists $pms->{'imageinfo'}->{$name});
foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}}) {
my ($h,$w) = split(/x/,$dem);
next if ($h < $minh); # height less than min height
next if ($w < $minw); # width less than min width
next if (defined $maxh && $h > $maxh); # height more than max height
next if (defined $maxw && $w > $maxw); # width more than max width
# if we make it here, we have a match
return 1;
}
return 0;
}
# -----------------------------------------
sub result_check {
my ($min, $max, $value, $nomaxequal) = @_;
return 0 unless defined $value;
return 0 if ($value < $min);
return 0 if (defined $max && $value > $max);
return 0 if (defined $nomaxequal && $nomaxequal && $value == $max);
return 1;
}
# -----------------------------------------
1;