The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

use 5.016;
our $VERSION = '1.05';
use strict;
use Exporter 'import';
our @EXPORT = qw(image_id image_size);
use List::Util qw(max);
my %MAGIC = (
pack("C*", 0xff, 0xd8, 0xff) => 'jpg',
pack("C*", 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a) => 'png',
pack("C*", 0x47, 0x49, 0x46, 0x38) => 'gif',
pack("C*", 0x52, 0x49, 0x46, 0x46) => 'webp',
pack("C*", 0x42, 0x4d) => 'bmp',
pack("C*", 0x49, 0x49) => 'tif',
pack("C*", 0x4d, 0x4d) => 'tif',
);
# File formats that do not have magic bytes, use a subroutine instead.
my %NONMAGIC = (
'svg' => sub {
substr(${ $_[0] }, 0, 1024) =~ /<\s*svg[^<>]*>/
},
);
# This function may not support many image formats as it was designed for
# getting image sizes for CHM files to determine the cover image. CHMs
# primarily use GIFs.
# TODO: Add tif support
# TODO: Add webp support (probably never)
my %SIZE = (
# size stored as two BE ushorts in the SOF0 marker, at offset 5.
'jpg' => sub {
my $ref = shift;
my $len = length $$ref;
my $p = 2;
my $sof = join ' ', 0xff, 0xc0;
while ($p < $len) {
my $id = join ' ', unpack "CC", substr $$ref, $p, 2;
$p += 2;
my $mlen = unpack "n", substr $$ref, $p, 2;
unless ($id eq $sof) {
$p += $mlen;
next;
}
my ($y, $x) = unpack "nn", substr $$ref, $p + 3, 4;
return [ $x, $y ];
}
return undef;
},
# size stored as two BE ulongs at offset 16
'png' => sub {
my $ref = shift;
return undef unless length $$ref > 24;
my ($x, $y) = unpack "N N", substr $$ref, 16, 8;
return [ $x, $y ];
},
# size stored as two LE ushorts at offset 6
'gif' => sub {
my $ref = shift;
return undef unless length $$ref > 10;
my ($x, $y) = unpack "v v", substr $$ref, 6, 4;
return [ $x, $y ];
},
# size storage depends on header. For an OS header, two LE ushorts at
# offset 18. For Windows, two LE signed longs at offset 18.
'bmp' => sub {
my $ref = shift;
return undef unless length $$ref > 24;
my $dbisize = unpack "V", substr $$ref, 14, 4;
my ($x, $y);
# OS
if ($dbisize == 16) {
($x, $y) = unpack "v v", substr $$ref, 18, 4;
# Win
} else {
($x, $y) = unpack "(ll)<", substr $$ref, 18, 8;
return undef if $x < 0 or $y < 0;
}
return [ $x, $y ];
},
# Get width and height attributes of root node.
'svg' => sub {
my $ref = shift;
my $dom;
eval {
$dom = XML::LibXML->load_xml(string => $ref);
1;
} or return undef;
my $svg = $dom->documentElement;
my $x = $svg->getAttribute('width') or return undef;
my $y = $svg->getAttribute('height') or return undef;
return [ $x, $y ];
},
);
sub image_id {
my $ref = shift;
my $sublen = max map { length } keys %MAGIC;
my $mag = substr $$ref, 0, $sublen;
for my $m (keys %MAGIC) {
return $MAGIC{ $m } if $mag =~ /^\Q$m\E/;
}
for my $nm (keys %NONMAGIC) {
return $nm if $NONMAGIC{ $nm }->($ref);
}
return undef;
}
sub image_size {
my $ref = shift;
my $fmt = shift // image_id($ref);
unless (defined $fmt) {
die "Could not determine image data format\n";
}
unless (exists $SIZE{ $fmt }) {
return undef;
}
return $SIZE{ $fmt }->($ref);
}
1;
=head1 NAME
EBook::Ishmael::ImageID - Identify image data format
=head1 SYNOPSIS
use EBook::Ishmael::ImageID;
my $format = image_id($dataref);
=head1 DESCRIPTION
B<EBook::Ishmael::ImageID> is a module that provides the C<image_id()>
subroutine, which identifies the image format of a given buffer. This is
developer documentation, for L<ishmael> user documentation you should consult
its manual.
Currently, the following formats are supported:
=over 4
=item jpg
=item png
=item gif
=item webp
=item bmp
=item tif
=item svg
=back
=head1 SUBROUTINES
=over 4
=item $f = image_id($dataref)
Returns a string of the image format of the given image buffer. C<$dataref>
must be a scalar ref. Returns C<undef> if the image's format could not be
identified.
=item [$x, $y] = image_size($dataref, [$fmt])
Returns an C<$x>/C<$y> pair representing the image data's size. C<$fmt> is an
optional argument specifying the format to use for the image data. If not
specified, C<image_size> will identify the format itself. If the image size
could not be determined, returns C<undef>.
This subroutine does not support the following formats (yet):
=over 4
=item webp
=item tif
=back
=back
=head1 AUTHOR
Written by Samuel Young, E<lt>samyoung12788@gmail.comE<gt>.
This project's source can be found on its
L<Codeberg Page|https://codeberg.org/1-1sam/ishmael>. Comments and pull
requests are welcome!
=head1 COPYRIGHT
Copyright (C) 2025 Samuel Young
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
=cut