package EBook::Ishmael::ImageID;
use 5.016;
our $VERSION = '1.05';
use strict;
use warnings;

use Exporter 'import';
our @EXPORT = qw(image_id image_size);

use List::Util qw(max);

use XML::LibXML;

my %MAGIC = (
	pack("C*", 0xff, 0xd8, 0xff)                               => 'jpg',
	pack("C*", 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a) => 'png',
	pack("C*", 0x47, 0x49, 0x46, 0x38)                         => 'gif',
	pack("C*", 0x52, 0x49, 0x46, 0x46)                         => 'webp',
	pack("C*", 0x42, 0x4d)                                     => 'bmp',
	pack("C*", 0x49, 0x49)                                     => 'tif',
	pack("C*", 0x4d, 0x4d)                                     => 'tif',
);

# File formats that do not have magic bytes, use a subroutine instead.
my %NONMAGIC = (
	'svg' => sub {
		substr(${ $_[0] }, 0, 1024) =~ /<\s*svg[^<>]*>/
	},
);

# This function may not support many image formats as it was designed for
# getting image sizes for CHM files to determine the cover image. CHMs
# primarily use GIFs.
# TODO: Add tif support
# TODO: Add webp support (probably never)
my %SIZE = (
	# size stored as two BE ushorts in the SOF0 marker, at offset 5.
	'jpg' => sub {

		my $ref = shift;

		my $len = length $$ref;

		my $p = 2;

		my $sof = join ' ', 0xff, 0xc0;

		while ($p < $len) {

			my $id = join ' ', unpack "CC", substr $$ref, $p, 2;
			$p += 2;
			my $mlen = unpack "n", substr $$ref, $p, 2;

			unless ($id eq $sof) {
				$p += $mlen;
				next;
			}

			my ($y, $x) = unpack "nn", substr $$ref, $p + 3, 4;

			return [ $x, $y ];

		}

		return undef;

	},
	# size stored as two BE ulongs at offset 16
	'png' => sub {

		my $ref = shift;

		return undef unless length $$ref > 24;

		my ($x, $y) = unpack "N N", substr $$ref, 16, 8;

		return [ $x, $y ];

	},
	# size stored as two LE ushorts at offset 6
	'gif' => sub {

		my $ref = shift;

		return undef unless length $$ref > 10;

		my ($x, $y) = unpack "v v", substr $$ref, 6, 4;

		return [ $x, $y ];

	},
	# size storage depends on header. For an OS header, two LE ushorts at
	# offset 18. For Windows, two LE signed longs at offset 18.
	'bmp' => sub {

		my $ref = shift;

		return undef unless length $$ref > 24;

		my $dbisize = unpack "V", substr $$ref, 14, 4;

		my ($x, $y);

		# OS
		if ($dbisize == 16) {
			($x, $y) = unpack "v v", substr $$ref, 18, 4;
		# Win
		} else {
			($x, $y) = unpack "(ll)<", substr $$ref, 18, 8;
			return undef if $x < 0 or $y < 0;
		}

		return [ $x, $y ];

	},
	# Get width and height attributes of root node.
	'svg' => sub {

		my $ref = shift;

		my $dom;

		eval {
			$dom = XML::LibXML->load_xml(string => $ref);
			1;
		} or return undef;

		my $svg = $dom->documentElement;

		my $x = $svg->getAttribute('width') or return undef;
		my $y = $svg->getAttribute('height') or return undef;

		return [ $x, $y ];

	},
);

sub image_id {

	my $ref = shift;

	my $sublen = max map { length } keys %MAGIC;

	my $mag = substr $$ref, 0, $sublen;

	for my $m (keys %MAGIC) {
		return $MAGIC{ $m } if $mag =~ /^\Q$m\E/;
	}

	for my $nm (keys %NONMAGIC) {
		return $nm if $NONMAGIC{ $nm }->($ref);
	}

	return undef;

}

sub image_size {

	my $ref = shift;
	my $fmt = shift // image_id($ref);

	unless (defined $fmt) {
		die "Could not determine image data format\n";
	}

	unless (exists $SIZE{ $fmt }) {
		return undef;
	}

	return $SIZE{ $fmt }->($ref);

}

1;

=head1 NAME

EBook::Ishmael::ImageID - Identify image data format

=head1 SYNOPSIS

  use EBook::Ishmael::ImageID;

  my $format = image_id($dataref);

=head1 DESCRIPTION

B<EBook::Ishmael::ImageID> is a module that provides the C<image_id()>
subroutine, which identifies the image format of a given buffer. This is
developer documentation, for L<ishmael> user documentation you should consult
its manual.

Currently, the following formats are supported:

=over 4

=item jpg

=item png

=item gif

=item webp

=item bmp

=item tif

=item svg

=back

=head1 SUBROUTINES

=over 4

=item $f = image_id($dataref)

Returns a string of the image format of the given image buffer. C<$dataref>
must be a scalar ref. Returns C<undef> if the image's format could not be
identified.

=item [$x, $y] = image_size($dataref, [$fmt])

Returns an C<$x>/C<$y> pair representing the image data's size. C<$fmt> is an
optional argument specifying the format to use for the image data. If not
specified, C<image_size> will identify the format itself. If the image size
could not be determined, returns C<undef>.

This subroutine does not support the following formats (yet):

=over 4

=item webp

=item tif

=back

=back

=head1 AUTHOR

Written by Samuel Young, E<lt>samyoung12788@gmail.comE<gt>.

This project's source can be found on its
L<Codeberg Page|https://codeberg.org/1-1sam/ishmael>. Comments and pull
requests are welcome!

=head1 COPYRIGHT

Copyright (C) 2025 Samuel Young

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

=cut