package EBook::Ishmael::MobiHuff; use 5.016; our $VERSION = '1.06'; use strict; use warnings; use List::Util qw(max min); our $UNPACK_Q = !!eval { pack "Q>", 1 }; # Many thanks to Calibre, much of the code in this module was based on their # huffman decoder. my $HUFF_HDR = pack "A4 N", 'HUFF', 24; my $CDIC_HDR = pack "A4 N", 'CDIC', 16; sub _load_huff { my $self = shift; my $huff = shift; unless (substr($huff, 0, 8) eq $HUFF_HDR) { die "Invalid MOBI HUFF header\n"; } my @off = unpack "N N", substr $huff, 8, 8; @{ $self->{dict1} } = map { my $len = $_ & 0x1f; my $term = $_ & 0x80; my $max = $_ >> 8; if ($len == 0) { die "Invalid MOBI HUFF dictionary\n"; } if ($len <= 8 and !$term) { die "Invalid MOBI HUFF dictionary\n"; } $max = (($max + 1) << (32 - $len)) - 1; [ $len, $term, $max ]; } unpack "N256", substr $huff, $off[0], 4 * 256; my @dict2 = unpack "N64", substr $huff, $off[1], 4 * 64; my @mins = (0, map { $dict2[$_] } grep { $_ % 2 == 0 } (0 .. $#dict2)); my @maxs = (0, map { $dict2[$_] } grep { $_ % 2 != 0 } (0 .. $#dict2)); $self->{mincode} = [ map { $mins[$_] << (32 - $_) } (0 .. $#mins) ]; $self->{maxcode} = [ map { (($maxs[$_] + 1) << (32 - $_)) - 1 } (0 .. $#maxs) ]; return 1; } sub _load_cdic { my $self = shift; my $cdic = shift; unless (substr($cdic, 0, 8) eq $CDIC_HDR) { die "Invalid MOBI CDIC header\n"; } my ($phrases, $bits) = unpack "N N", substr $cdic, 8, 8; my $n = min(1 << $bits, $phrases - @{ $self->{dictionary} }); push @{ $self->{dictionary} }, map { my $blen = unpack "n", substr $cdic, 16 + $_; [ substr($cdic, 18 + $_, $blen & 0x7fff), $blen & 0x8000, ]; } unpack "n$n", substr $cdic, 16; return 1; } sub new { my $class = shift; my $huff = shift; my @cdic = @_; my $self = { dict1 => [], dictionary => [], mincode => [], maxcode => [], }; bless $self, $class; $self->_load_huff($huff); for my $c (@cdic) { $self->_load_cdic($c); } return $self; } sub decode { my $self = shift; my $data = shift; my $left = length($data) * 8; $data .= "\x00" x 8; my $pos = 0; my $x = unpack "Q>", $data; my $n = 32; my $s = ''; while (1) { if ($n <= 0) { $pos += 4; $x = unpack "Q>", substr $data, $pos, 8; $n += 32; } my $code = ($x >> $n) & ((1 << 32) - 1); my ($len, $term, $max) = @{ $self->{dict1}[$code >> 24] }; unless ($term) { $len += 1 while $code < $self->{mincode}[$len]; $max = $self->{maxcode}[$len]; } $n -= $len; $left -= $len; last if $left < 0; my $r = ($max - $code) >> (32 - $len); my ($slice, $flag) = @{ $self->{dictionary}[$r] }; unless ($flag) { $self->{dictionary}[$r] = []; $slice = $self->decode($slice); $self->{dictionary}[$r] = [ $slice, 1 ]; } $s .= $slice; } return $s; } 1; =head1 NAME EBook::Ishmael::MobiHuff - Huff/CDIC decoder for MOBI/AZW =head1 SYNOPSIS use EBook::Ishmael::MobiHuff; my $mh = EBook::Ishmael::MobiHuff->new($huff, @cdics); my $decode = $mh->decode($data); =head1 DESCRIPTION B<EBook::Ishmael::MobiHuff> is a module that provides an object-oriented interface for decoding Huff/CDIC-encoded data found in MOBI/AZW ebooks. This is developer documentation, please consult the L<ishmael> manual for user documentation. =head1 METHODS =over 4 =item $mh = EBook::Ishmael::MobiHuff->new($huff, @cdics) Returns a blessed C<EBook::Ishmael::MobiHuff> object and reads Huff/CDIC data from C<$huff> and C<@cdics>. C<$huff> is the record data for the C<HUFF> record, C<@cdics> is an array of record data for eacah C<CDIC> record. =item $data = $mh->decode($encode) Returns the decoded C<$data> from C<$encode>. =back =head1 AUTHOR Written by Samuel Young, E<lt>samyoung12788@gmail.comE<gt>. This project's source can be found on its L<Codeberg Page|https://codeberg.org/1-1sam/ishmael>. Comments and pull requests are welcome! =head1 COPYRIGHT Copyright (C) 2025 Samuel Young This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. =cut