—package
EBook::Ishmael::ImageID;
use
5.016;
our
$VERSION
=
'1.05'
;
use
strict;
use
warnings;
our
@EXPORT
=
qw(image_id image_size)
;
use
XML::LibXML;
my
%MAGIC
= (
pack
(
"C*"
, 0xff, 0xd8, 0xff) =>
'jpg'
,
pack
(
"C*"
, 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a) =>
'png'
,
pack
(
"C*"
, 0x47, 0x49, 0x46, 0x38) =>
'gif'
,
pack
(
"C*"
, 0x52, 0x49, 0x46, 0x46) =>
'webp'
,
pack
(
"C*"
, 0x42, 0x4d) =>
'bmp'
,
pack
(
"C*"
, 0x49, 0x49) =>
'tif'
,
pack
(
"C*"
, 0x4d, 0x4d) =>
'tif'
,
);
# File formats that do not have magic bytes, use a subroutine instead.
my
%NONMAGIC
= (
'svg'
=>
sub
{
substr
(${
$_
[0] }, 0, 1024) =~ /<\s
*svg
[^<>]*>/
},
);
# This function may not support many image formats as it was designed for
# getting image sizes for CHM files to determine the cover image. CHMs
# primarily use GIFs.
# TODO: Add tif support
# TODO: Add webp support (probably never)
my
%SIZE
= (
# size stored as two BE ushorts in the SOF0 marker, at offset 5.
'jpg'
=>
sub
{
my
$ref
=
shift
;
my
$len
=
length
$$ref
;
my
$p
= 2;
my
$sof
=
join
' '
, 0xff, 0xc0;
while
(
$p
<
$len
) {
my
$id
=
join
' '
,
unpack
"CC"
,
substr
$$ref
,
$p
, 2;
$p
+= 2;
my
$mlen
=
unpack
"n"
,
substr
$$ref
,
$p
, 2;
unless
(
$id
eq
$sof
) {
$p
+=
$mlen
;
next
;
}
my
(
$y
,
$x
) =
unpack
"nn"
,
substr
$$ref
,
$p
+ 3, 4;
return
[
$x
,
$y
];
}
return
undef
;
},
# size stored as two BE ulongs at offset 16
'png'
=>
sub
{
my
$ref
=
shift
;
return
undef
unless
length
$$ref
> 24;
my
(
$x
,
$y
) =
unpack
"N N"
,
substr
$$ref
, 16, 8;
return
[
$x
,
$y
];
},
# size stored as two LE ushorts at offset 6
'gif'
=>
sub
{
my
$ref
=
shift
;
return
undef
unless
length
$$ref
> 10;
my
(
$x
,
$y
) =
unpack
"v v"
,
substr
$$ref
, 6, 4;
return
[
$x
,
$y
];
},
# size storage depends on header. For an OS header, two LE ushorts at
# offset 18. For Windows, two LE signed longs at offset 18.
'bmp'
=>
sub
{
my
$ref
=
shift
;
return
undef
unless
length
$$ref
> 24;
my
$dbisize
=
unpack
"V"
,
substr
$$ref
, 14, 4;
my
(
$x
,
$y
);
# OS
if
(
$dbisize
== 16) {
(
$x
,
$y
) =
unpack
"v v"
,
substr
$$ref
, 18, 4;
# Win
}
else
{
(
$x
,
$y
) =
unpack
"(ll)<"
,
substr
$$ref
, 18, 8;
return
undef
if
$x
< 0 or
$y
< 0;
}
return
[
$x
,
$y
];
},
# Get width and height attributes of root node.
'svg'
=>
sub
{
my
$ref
=
shift
;
my
$dom
;
eval
{
$dom
= XML::LibXML->load_xml(
string
=>
$ref
);
1;
} or
return
undef
;
my
$svg
=
$dom
->documentElement;
my
$x
=
$svg
->getAttribute(
'width'
) or
return
undef
;
my
$y
=
$svg
->getAttribute(
'height'
) or
return
undef
;
return
[
$x
,
$y
];
},
);
sub
image_id {
my
$ref
=
shift
;
my
$sublen
= max
map
{
length
}
keys
%MAGIC
;
my
$mag
=
substr
$$ref
, 0,
$sublen
;
for
my
$m
(
keys
%MAGIC
) {
return
$MAGIC
{
$m
}
if
$mag
=~ /^\Q
$m
\E/;
}
for
my
$nm
(
keys
%NONMAGIC
) {
return
$nm
if
$NONMAGIC
{
$nm
}->(
$ref
);
}
return
undef
;
}
sub
image_size {
my
$ref
=
shift
;
my
$fmt
=
shift
// image_id(
$ref
);
unless
(
defined
$fmt
) {
die
"Could not determine image data format\n"
;
}
unless
(
exists
$SIZE
{
$fmt
}) {
return
undef
;
}
return
$SIZE
{
$fmt
}->(
$ref
);
}
1;
=head1 NAME
EBook::Ishmael::ImageID - Identify image data format
=head1 SYNOPSIS
use EBook::Ishmael::ImageID;
my $format = image_id($dataref);
=head1 DESCRIPTION
B<EBook::Ishmael::ImageID> is a module that provides the C<image_id()>
subroutine, which identifies the image format of a given buffer. This is
developer documentation, for L<ishmael> user documentation you should consult
its manual.
Currently, the following formats are supported:
=over 4
=item jpg
=item png
=item gif
=item webp
=item bmp
=item tif
=item svg
=back
=head1 SUBROUTINES
=over 4
=item $f = image_id($dataref)
Returns a string of the image format of the given image buffer. C<$dataref>
must be a scalar ref. Returns C<undef> if the image's format could not be
identified.
=item [$x, $y] = image_size($dataref, [$fmt])
Returns an C<$x>/C<$y> pair representing the image data's size. C<$fmt> is an
optional argument specifying the format to use for the image data. If not
specified, C<image_size> will identify the format itself. If the image size
could not be determined, returns C<undef>.
This subroutine does not support the following formats (yet):
=over 4
=item webp
=item tif
=back
=back
=head1 AUTHOR
Written by Samuel Young, E<lt>samyoung12788@gmail.comE<gt>.
This project's source can be found on its
L<Codeberg Page|https://codeberg.org/1-1sam/ishmael>. Comments and pull
requests are welcome!
=head1 COPYRIGHT
Copyright (C) 2025 Samuel Young
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
=cut